1 //==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that AArch64 uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
15 #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
16 
17 #include "AArch64.h"
18 #include "Utils/AArch64SMEAttributes.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/SelectionDAG.h"
22 #include "llvm/CodeGen/TargetLowering.h"
23 #include "llvm/IR/CallingConv.h"
24 #include "llvm/IR/Instruction.h"
25 
26 namespace llvm {
27 
28 namespace AArch64ISD {
29 
30 // For predicated nodes where the result is a vector, the operation is
31 // controlled by a governing predicate and the inactive lanes are explicitly
32 // defined with a value, please stick the following naming convention:
33 //
34 //    _MERGE_OP<n>        The result value is a vector with inactive lanes equal
35 //                        to source operand OP<n>.
36 //
37 //    _MERGE_ZERO         The result value is a vector with inactive lanes
38 //                        actively zeroed.
39 //
40 //    _MERGE_PASSTHRU     The result value is a vector with inactive lanes equal
41 //                        to the last source operand which only purpose is being
42 //                        a passthru value.
43 //
44 // For other cases where no explicit action is needed to set the inactive lanes,
45 // or when the result is not a vector and it is needed or helpful to
46 // distinguish a node from similar unpredicated nodes, use:
47 //
48 //    _PRED
49 //
50 enum NodeType : unsigned {
51   FIRST_NUMBER = ISD::BUILTIN_OP_END,
52   WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
53   CALL,         // Function call.
54 
55   // Pseudo for a OBJC call that gets emitted together with a special `mov
56   // x29, x29` marker instruction.
57   CALL_RVMARKER,
58 
59   CALL_BTI, // Function call followed by a BTI instruction.
60 
61   // Essentially like a normal COPY that works on GPRs, but cannot be
62   // rematerialised by passes like the simple register coalescer. It's
63   // required for SME when lowering calls because we cannot allow frame
64   // index calculations using addvl to slip in between the smstart/smstop
65   // and the bl instruction. The scalable vector length may change across
66   // the smstart/smstop boundary.
67   OBSCURE_COPY,
68   SMSTART,
69   SMSTOP,
70   RESTORE_ZA,
71 
72   // Produces the full sequence of instructions for getting the thread pointer
73   // offset of a variable into X0, using the TLSDesc model.
74   TLSDESC_CALLSEQ,
75   ADRP,     // Page address of a TargetGlobalAddress operand.
76   ADR,      // ADR
77   ADDlow,   // Add the low 12 bits of a TargetGlobalAddress operand.
78   LOADgot,  // Load from automatically generated descriptor (e.g. Global
79             // Offset Table, TLS record).
80   RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand.
81   BRCOND,   // Conditional branch instruction; "b.cond".
82   CSEL,
83   CSINV, // Conditional select invert.
84   CSNEG, // Conditional select negate.
85   CSINC, // Conditional select increment.
86 
87   // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
88   // ELF.
89   THREAD_POINTER,
90   ADC,
91   SBC, // adc, sbc instructions
92 
93   // Predicated instructions where inactive lanes produce undefined results.
94   ABDS_PRED,
95   ABDU_PRED,
96   FADD_PRED,
97   FDIV_PRED,
98   FMA_PRED,
99   FMAX_PRED,
100   FMAXNM_PRED,
101   FMIN_PRED,
102   FMINNM_PRED,
103   FMUL_PRED,
104   FSUB_PRED,
105   HADDS_PRED,
106   HADDU_PRED,
107   MUL_PRED,
108   MULHS_PRED,
109   MULHU_PRED,
110   RHADDS_PRED,
111   RHADDU_PRED,
112   SDIV_PRED,
113   SHL_PRED,
114   SMAX_PRED,
115   SMIN_PRED,
116   SRA_PRED,
117   SRL_PRED,
118   UDIV_PRED,
119   UMAX_PRED,
120   UMIN_PRED,
121 
122   // Unpredicated vector instructions
123   BIC,
124 
125   SRAD_MERGE_OP1,
126 
127   // Predicated instructions with the result of inactive lanes provided by the
128   // last operand.
129   FABS_MERGE_PASSTHRU,
130   FCEIL_MERGE_PASSTHRU,
131   FFLOOR_MERGE_PASSTHRU,
132   FNEARBYINT_MERGE_PASSTHRU,
133   FNEG_MERGE_PASSTHRU,
134   FRECPX_MERGE_PASSTHRU,
135   FRINT_MERGE_PASSTHRU,
136   FROUND_MERGE_PASSTHRU,
137   FROUNDEVEN_MERGE_PASSTHRU,
138   FSQRT_MERGE_PASSTHRU,
139   FTRUNC_MERGE_PASSTHRU,
140   FP_ROUND_MERGE_PASSTHRU,
141   FP_EXTEND_MERGE_PASSTHRU,
142   UINT_TO_FP_MERGE_PASSTHRU,
143   SINT_TO_FP_MERGE_PASSTHRU,
144   FCVTZU_MERGE_PASSTHRU,
145   FCVTZS_MERGE_PASSTHRU,
146   SIGN_EXTEND_INREG_MERGE_PASSTHRU,
147   ZERO_EXTEND_INREG_MERGE_PASSTHRU,
148   ABS_MERGE_PASSTHRU,
149   NEG_MERGE_PASSTHRU,
150 
151   SETCC_MERGE_ZERO,
152 
153   // Arithmetic instructions which write flags.
154   ADDS,
155   SUBS,
156   ADCS,
157   SBCS,
158   ANDS,
159 
160   // Conditional compares. Operands: left,right,falsecc,cc,flags
161   CCMP,
162   CCMN,
163   FCCMP,
164 
165   // Floating point comparison
166   FCMP,
167 
168   // Scalar extract
169   EXTR,
170 
171   // Scalar-to-vector duplication
172   DUP,
173   DUPLANE8,
174   DUPLANE16,
175   DUPLANE32,
176   DUPLANE64,
177   DUPLANE128,
178 
179   // Vector immedate moves
180   MOVI,
181   MOVIshift,
182   MOVIedit,
183   MOVImsl,
184   FMOV,
185   MVNIshift,
186   MVNImsl,
187 
188   // Vector immediate ops
189   BICi,
190   ORRi,
191 
192   // Vector bitwise select: similar to ISD::VSELECT but not all bits within an
193   // element must be identical.
194   BSP,
195 
196   // Vector shuffles
197   ZIP1,
198   ZIP2,
199   UZP1,
200   UZP2,
201   TRN1,
202   TRN2,
203   REV16,
204   REV32,
205   REV64,
206   EXT,
207   SPLICE,
208 
209   // Vector shift by scalar
210   VSHL,
211   VLSHR,
212   VASHR,
213 
214   // Vector shift by scalar (again)
215   SQSHL_I,
216   UQSHL_I,
217   SQSHLU_I,
218   SRSHR_I,
219   URSHR_I,
220 
221   // Vector shift by constant and insert
222   VSLI,
223   VSRI,
224 
225   // Vector comparisons
226   CMEQ,
227   CMGE,
228   CMGT,
229   CMHI,
230   CMHS,
231   FCMEQ,
232   FCMGE,
233   FCMGT,
234 
235   // Vector zero comparisons
236   CMEQz,
237   CMGEz,
238   CMGTz,
239   CMLEz,
240   CMLTz,
241   FCMEQz,
242   FCMGEz,
243   FCMGTz,
244   FCMLEz,
245   FCMLTz,
246 
247   // Vector across-lanes addition
248   // Only the lower result lane is defined.
249   SADDV,
250   UADDV,
251 
252   // Add Pairwise of two vectors
253   ADDP,
254   // Add Long Pairwise
255   SADDLP,
256   UADDLP,
257 
258   // udot/sdot instructions
259   UDOT,
260   SDOT,
261 
262   // Vector across-lanes min/max
263   // Only the lower result lane is defined.
264   SMINV,
265   UMINV,
266   SMAXV,
267   UMAXV,
268 
269   SADDV_PRED,
270   UADDV_PRED,
271   SMAXV_PRED,
272   UMAXV_PRED,
273   SMINV_PRED,
274   UMINV_PRED,
275   ORV_PRED,
276   EORV_PRED,
277   ANDV_PRED,
278 
279   // Vector bitwise insertion
280   BIT,
281 
282   // Compare-and-branch
283   CBZ,
284   CBNZ,
285   TBZ,
286   TBNZ,
287 
288   // Tail calls
289   TC_RETURN,
290 
291   // Custom prefetch handling
292   PREFETCH,
293 
294   // {s|u}int to FP within a FP register.
295   SITOF,
296   UITOF,
297 
298   /// Natural vector cast. ISD::BITCAST is not natural in the big-endian
299   /// world w.r.t vectors; which causes additional REV instructions to be
300   /// generated to compensate for the byte-swapping. But sometimes we do
301   /// need to re-interpret the data in SIMD vector registers in big-endian
302   /// mode without emitting such REV instructions.
303   NVCAST,
304 
305   MRS, // MRS, also sets the flags via a glue.
306 
307   SMULL,
308   UMULL,
309 
310   PMULL,
311 
312   // Reciprocal estimates and steps.
313   FRECPE,
314   FRECPS,
315   FRSQRTE,
316   FRSQRTS,
317 
318   SUNPKHI,
319   SUNPKLO,
320   UUNPKHI,
321   UUNPKLO,
322 
323   CLASTA_N,
324   CLASTB_N,
325   LASTA,
326   LASTB,
327   TBL,
328 
329   // Floating-point reductions.
330   FADDA_PRED,
331   FADDV_PRED,
332   FMAXV_PRED,
333   FMAXNMV_PRED,
334   FMINV_PRED,
335   FMINNMV_PRED,
336 
337   INSR,
338   PTEST,
339   PTEST_ANY,
340   PTRUE,
341 
342   BITREVERSE_MERGE_PASSTHRU,
343   BSWAP_MERGE_PASSTHRU,
344   REVH_MERGE_PASSTHRU,
345   REVW_MERGE_PASSTHRU,
346   CTLZ_MERGE_PASSTHRU,
347   CTPOP_MERGE_PASSTHRU,
348   DUP_MERGE_PASSTHRU,
349   INDEX_VECTOR,
350 
351   // Cast between vectors of the same element type but differ in length.
352   REINTERPRET_CAST,
353 
354   // Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa
355   LS64_BUILD,
356   LS64_EXTRACT,
357 
358   LD1_MERGE_ZERO,
359   LD1S_MERGE_ZERO,
360   LDNF1_MERGE_ZERO,
361   LDNF1S_MERGE_ZERO,
362   LDFF1_MERGE_ZERO,
363   LDFF1S_MERGE_ZERO,
364   LD1RQ_MERGE_ZERO,
365   LD1RO_MERGE_ZERO,
366 
367   // Structured loads.
368   SVE_LD2_MERGE_ZERO,
369   SVE_LD3_MERGE_ZERO,
370   SVE_LD4_MERGE_ZERO,
371 
372   // Unsigned gather loads.
373   GLD1_MERGE_ZERO,
374   GLD1_SCALED_MERGE_ZERO,
375   GLD1_UXTW_MERGE_ZERO,
376   GLD1_SXTW_MERGE_ZERO,
377   GLD1_UXTW_SCALED_MERGE_ZERO,
378   GLD1_SXTW_SCALED_MERGE_ZERO,
379   GLD1_IMM_MERGE_ZERO,
380 
381   // Signed gather loads
382   GLD1S_MERGE_ZERO,
383   GLD1S_SCALED_MERGE_ZERO,
384   GLD1S_UXTW_MERGE_ZERO,
385   GLD1S_SXTW_MERGE_ZERO,
386   GLD1S_UXTW_SCALED_MERGE_ZERO,
387   GLD1S_SXTW_SCALED_MERGE_ZERO,
388   GLD1S_IMM_MERGE_ZERO,
389 
390   // Unsigned gather loads.
391   GLDFF1_MERGE_ZERO,
392   GLDFF1_SCALED_MERGE_ZERO,
393   GLDFF1_UXTW_MERGE_ZERO,
394   GLDFF1_SXTW_MERGE_ZERO,
395   GLDFF1_UXTW_SCALED_MERGE_ZERO,
396   GLDFF1_SXTW_SCALED_MERGE_ZERO,
397   GLDFF1_IMM_MERGE_ZERO,
398 
399   // Signed gather loads.
400   GLDFF1S_MERGE_ZERO,
401   GLDFF1S_SCALED_MERGE_ZERO,
402   GLDFF1S_UXTW_MERGE_ZERO,
403   GLDFF1S_SXTW_MERGE_ZERO,
404   GLDFF1S_UXTW_SCALED_MERGE_ZERO,
405   GLDFF1S_SXTW_SCALED_MERGE_ZERO,
406   GLDFF1S_IMM_MERGE_ZERO,
407 
408   // Non-temporal gather loads
409   GLDNT1_MERGE_ZERO,
410   GLDNT1_INDEX_MERGE_ZERO,
411   GLDNT1S_MERGE_ZERO,
412 
413   // Contiguous masked store.
414   ST1_PRED,
415 
416   // Scatter store
417   SST1_PRED,
418   SST1_SCALED_PRED,
419   SST1_UXTW_PRED,
420   SST1_SXTW_PRED,
421   SST1_UXTW_SCALED_PRED,
422   SST1_SXTW_SCALED_PRED,
423   SST1_IMM_PRED,
424 
425   // Non-temporal scatter store
426   SSTNT1_PRED,
427   SSTNT1_INDEX_PRED,
428 
429   // SME
430   RDSVL,
431   REVD_MERGE_PASSTHRU,
432 
433   // Asserts that a function argument (i32) is zero-extended to i8 by
434   // the caller
435   ASSERT_ZEXT_BOOL,
436 
437   // 128-bit system register accesses
438   // lo64, hi64, chain = MRRS(chain, sysregname)
439   MRRS,
440   // chain = MSRR(chain, sysregname, lo64, hi64)
441   MSRR,
442 
443   // Strict (exception-raising) floating point comparison
444   STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
445   STRICT_FCMPE,
446 
447   // NEON Load/Store with post-increment base updates
448   LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
449   LD3post,
450   LD4post,
451   ST2post,
452   ST3post,
453   ST4post,
454   LD1x2post,
455   LD1x3post,
456   LD1x4post,
457   ST1x2post,
458   ST1x3post,
459   ST1x4post,
460   LD1DUPpost,
461   LD2DUPpost,
462   LD3DUPpost,
463   LD4DUPpost,
464   LD1LANEpost,
465   LD2LANEpost,
466   LD3LANEpost,
467   LD4LANEpost,
468   ST2LANEpost,
469   ST3LANEpost,
470   ST4LANEpost,
471 
472   STG,
473   STZG,
474   ST2G,
475   STZ2G,
476 
477   LDP,
478   LDNP,
479   STP,
480   STNP,
481 
482   // Memory Operations
483   MOPS_MEMSET,
484   MOPS_MEMSET_TAGGING,
485   MOPS_MEMCOPY,
486   MOPS_MEMMOVE,
487 };
488 
489 } // end namespace AArch64ISD
490 
491 namespace AArch64 {
492 /// Possible values of current rounding mode, which is specified in bits
493 /// 23:22 of FPCR.
494 enum Rounding {
495   RN = 0,    // Round to Nearest
496   RP = 1,    // Round towards Plus infinity
497   RM = 2,    // Round towards Minus infinity
498   RZ = 3,    // Round towards Zero
499   rmMask = 3 // Bit mask selecting rounding mode
500 };
501 
502 // Bit position of rounding mode bits in FPCR.
503 const unsigned RoundingBitsPos = 22;
504 } // namespace AArch64
505 
506 class AArch64Subtarget;
507 
508 class AArch64TargetLowering : public TargetLowering {
509 public:
510   explicit AArch64TargetLowering(const TargetMachine &TM,
511                                  const AArch64Subtarget &STI);
512 
513   /// Control the following reassociation of operands: (op (op x, c1), y) -> (op
514   /// (op x, y), c1) where N0 is (op x, c1) and N1 is y.
515   bool isReassocProfitable(SelectionDAG &DAG, SDValue N0,
516                            SDValue N1) const override;
517 
518   /// Selects the correct CCAssignFn for a given CallingConvention value.
519   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
520 
521   /// Selects the correct CCAssignFn for a given CallingConvention value.
522   CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const;
523 
524   /// Determine which of the bits specified in Mask are known to be either zero
525   /// or one and return them in the KnownZero/KnownOne bitsets.
526   void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
527                                      const APInt &DemandedElts,
528                                      const SelectionDAG &DAG,
529                                      unsigned Depth = 0) const override;
530 
531   MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
532     // Returning i64 unconditionally here (i.e. even for ILP32) means that the
533     // *DAG* representation of pointers will always be 64-bits. They will be
534     // truncated and extended when transferred to memory, but the 64-bit DAG
535     // allows us to use AArch64's addressing modes much more easily.
536     return MVT::getIntegerVT(64);
537   }
538 
539   bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
540                                     const APInt &DemandedElts,
541                                     TargetLoweringOpt &TLO) const override;
542 
543   MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
544 
545   /// Returns true if the target allows unaligned memory accesses of the
546   /// specified type.
547   bool allowsMisalignedMemoryAccesses(
548       EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
549       MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
550       unsigned *Fast = nullptr) const override;
551   /// LLT variant.
552   bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace,
553                                       Align Alignment,
554                                       MachineMemOperand::Flags Flags,
555                                       unsigned *Fast = nullptr) const override;
556 
557   /// Provide custom lowering hooks for some operations.
558   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
559 
560   const char *getTargetNodeName(unsigned Opcode) const override;
561 
562   SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
563 
564   /// This method returns a target specific FastISel object, or null if the
565   /// target does not support "fast" ISel.
566   FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
567                            const TargetLibraryInfo *libInfo) const override;
568 
569   bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
570 
571   bool isFPImmLegal(const APFloat &Imm, EVT VT,
572                     bool ForCodeSize) const override;
573 
574   /// Return true if the given shuffle mask can be codegen'd directly, or if it
575   /// should be stack expanded.
576   bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
577 
578   /// Similar to isShuffleMaskLegal. Return true is the given 'select with zero'
579   /// shuffle mask can be codegen'd directly.
580   bool isVectorClearMaskLegal(ArrayRef<int> M, EVT VT) const override;
581 
582   /// Return the ISD::SETCC ValueType.
583   EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
584                          EVT VT) const override;
585 
586   SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
587 
588   MachineBasicBlock *EmitF128CSEL(MachineInstr &MI,
589                                   MachineBasicBlock *BB) const;
590 
591   MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
592                                            MachineBasicBlock *BB) const;
593 
594   MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg,
595                                   MachineInstr &MI,
596                                   MachineBasicBlock *BB) const;
597   MachineBasicBlock *EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const;
598   MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg,
599                                  MachineInstr &MI, MachineBasicBlock *BB,
600                                  bool HasTile) const;
601   MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const;
602 
603   MachineBasicBlock *
604   EmitInstrWithCustomInserter(MachineInstr &MI,
605                               MachineBasicBlock *MBB) const override;
606 
607   bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
608                           MachineFunction &MF,
609                           unsigned Intrinsic) const override;
610 
611   bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
612                              EVT NewVT) const override;
613 
614   bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
615   bool isTruncateFree(EVT VT1, EVT VT2) const override;
616 
617   bool isProfitableToHoist(Instruction *I) const override;
618 
619   bool isZExtFree(Type *Ty1, Type *Ty2) const override;
620   bool isZExtFree(EVT VT1, EVT VT2) const override;
621   bool isZExtFree(SDValue Val, EVT VT2) const override;
622 
623   bool shouldSinkOperands(Instruction *I,
624                           SmallVectorImpl<Use *> &Ops) const override;
625 
626   bool optimizeExtendOrTruncateConversion(Instruction *I,
627                                           Loop *L) const override;
628 
629   bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override;
630 
631   unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
632 
633   bool lowerInterleavedLoad(LoadInst *LI,
634                             ArrayRef<ShuffleVectorInst *> Shuffles,
635                             ArrayRef<unsigned> Indices,
636                             unsigned Factor) const override;
637   bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
638                              unsigned Factor) const override;
639 
640   bool isLegalAddImmediate(int64_t) const override;
641   bool isLegalICmpImmediate(int64_t) const override;
642 
643   bool isMulAddWithConstProfitable(SDValue AddNode,
644                                    SDValue ConstNode) const override;
645 
646   bool shouldConsiderGEPOffsetSplit() const override;
647 
648   EVT getOptimalMemOpType(const MemOp &Op,
649                           const AttributeList &FuncAttributes) const override;
650 
651   LLT getOptimalMemOpLLT(const MemOp &Op,
652                          const AttributeList &FuncAttributes) const override;
653 
654   /// Return true if the addressing mode represented by AM is legal for this
655   /// target, for a load/store of the specified type.
656   bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
657                              unsigned AS,
658                              Instruction *I = nullptr) const override;
659 
660   /// Return true if an FMA operation is faster than a pair of fmul and fadd
661   /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
662   /// returns true, otherwise fmuladd is expanded to fmul + fadd.
663   bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
664                                   EVT VT) const override;
665   bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
666 
667   bool generateFMAsInMachineCombiner(EVT VT,
668                                      CodeGenOpt::Level OptLevel) const override;
669 
670   const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
671 
672   /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
673   bool isDesirableToCommuteWithShift(const SDNode *N,
674                                      CombineLevel Level) const override;
675 
676   /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
677   bool isDesirableToCommuteXorWithShift(const SDNode *N) const override;
678 
679   /// Return true if it is profitable to fold a pair of shifts into a mask.
680   bool shouldFoldConstantShiftPairToMask(const SDNode *N,
681                                          CombineLevel Level) const override;
682 
683   /// Returns true if it is beneficial to convert a load of a constant
684   /// to just the constant itself.
685   bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
686                                          Type *Ty) const override;
687 
688   /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
689   /// with this index.
690   bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
691                                unsigned Index) const override;
692 
693   bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
694                             bool MathUsed) const override {
695     // Using overflow ops for overflow checks only should beneficial on
696     // AArch64.
697     return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
698   }
699 
700   Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr,
701                         AtomicOrdering Ord) const override;
702   Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr,
703                               AtomicOrdering Ord) const override;
704 
705   void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override;
706 
707   bool isOpSuitableForLDPSTP(const Instruction *I) const;
708   bool shouldInsertFencesForAtomic(const Instruction *I) const override;
709   bool
710   shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const override;
711 
712   TargetLoweringBase::AtomicExpansionKind
713   shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
714   TargetLoweringBase::AtomicExpansionKind
715   shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
716   TargetLoweringBase::AtomicExpansionKind
717   shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
718 
719   TargetLoweringBase::AtomicExpansionKind
720   shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
721 
722   bool useLoadStackGuardNode() const override;
723   TargetLoweringBase::LegalizeTypeAction
724   getPreferredVectorAction(MVT VT) const override;
725 
726   /// If the target has a standard location for the stack protector cookie,
727   /// returns the address of that location. Otherwise, returns nullptr.
728   Value *getIRStackGuard(IRBuilderBase &IRB) const override;
729 
730   void insertSSPDeclarations(Module &M) const override;
731   Value *getSDagStackGuard(const Module &M) const override;
732   Function *getSSPStackGuardCheck(const Module &M) const override;
733 
734   /// If the target has a standard location for the unsafe stack pointer,
735   /// returns the address of that location. Otherwise, returns nullptr.
736   Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
737 
738   /// If a physical register, this returns the register that receives the
739   /// exception address on entry to an EH pad.
740   Register
741   getExceptionPointerRegister(const Constant *PersonalityFn) const override {
742     // FIXME: This is a guess. Has this been defined yet?
743     return AArch64::X0;
744   }
745 
746   /// If a physical register, this returns the register that receives the
747   /// exception typeid on entry to a landing pad.
748   Register
749   getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
750     // FIXME: This is a guess. Has this been defined yet?
751     return AArch64::X1;
752   }
753 
754   bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
755 
756   bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
757                         const MachineFunction &MF) const override {
758     // Do not merge to float value size (128 bytes) if no implicit
759     // float attribute is set.
760 
761     bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
762 
763     if (NoFloat)
764       return (MemVT.getSizeInBits() <= 64);
765     return true;
766   }
767 
768   bool isCheapToSpeculateCttz(Type *) const override {
769     return true;
770   }
771 
772   bool isCheapToSpeculateCtlz(Type *) const override {
773     return true;
774   }
775 
776   bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
777 
778   bool hasAndNotCompare(SDValue V) const override {
779     // We can use bics for any scalar.
780     return V.getValueType().isScalarInteger();
781   }
782 
783   bool hasAndNot(SDValue Y) const override {
784     EVT VT = Y.getValueType();
785 
786     if (!VT.isVector())
787       return hasAndNotCompare(Y);
788 
789     TypeSize TS = VT.getSizeInBits();
790     // TODO: We should be able to use bic/bif too for SVE.
791     return !TS.isScalable() && TS.getFixedValue() >= 64; // vector 'bic'
792   }
793 
794   bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
795       SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
796       unsigned OldShiftOpcode, unsigned NewShiftOpcode,
797       SelectionDAG &DAG) const override;
798 
799   ShiftLegalizationStrategy
800   preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
801                                      unsigned ExpansionFactor) const override;
802 
803   bool shouldTransformSignedTruncationCheck(EVT XVT,
804                                             unsigned KeptBits) const override {
805     // For vectors, we don't have a preference..
806     if (XVT.isVector())
807       return false;
808 
809     auto VTIsOk = [](EVT VT) -> bool {
810       return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
811              VT == MVT::i64;
812     };
813 
814     // We are ok with KeptBitsVT being byte/word/dword, what SXT supports.
815     // XVT will be larger than KeptBitsVT.
816     MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
817     return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
818   }
819 
820   bool preferIncOfAddToSubOfNot(EVT VT) const override;
821 
822   bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
823 
824   bool isComplexDeinterleavingSupported() const override;
825   bool isComplexDeinterleavingOperationSupported(
826       ComplexDeinterleavingOperation Operation, Type *Ty) const override;
827 
828   Value *createComplexDeinterleavingIR(
829       Instruction *I, ComplexDeinterleavingOperation OperationType,
830       ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
831       Value *Accumulator = nullptr) const override;
832 
833   bool hasBitPreservingFPLogic(EVT VT) const override {
834     // FIXME: Is this always true? It should be true for vectors at least.
835     return VT == MVT::f32 || VT == MVT::f64;
836   }
837 
838   bool supportSplitCSR(MachineFunction *MF) const override {
839     return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
840            MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
841   }
842   void initializeSplitCSR(MachineBasicBlock *Entry) const override;
843   void insertCopiesSplitCSR(
844       MachineBasicBlock *Entry,
845       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
846 
847   bool supportSwiftError() const override {
848     return true;
849   }
850 
851   bool supportKCFIBundles() const override { return true; }
852 
853   /// Enable aggressive FMA fusion on targets that want it.
854   bool enableAggressiveFMAFusion(EVT VT) const override;
855 
856   /// Returns the size of the platform's va_list object.
857   unsigned getVaListSizeInBits(const DataLayout &DL) const override;
858 
859   /// Returns true if \p VecTy is a legal interleaved access type. This
860   /// function checks the vector element type and the overall width of the
861   /// vector.
862   bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL,
863                                     bool &UseScalable) const;
864 
865   /// Returns the number of interleaved accesses that will be generated when
866   /// lowering accesses of the given type.
867   unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL,
868                                      bool UseScalable) const;
869 
870   MachineMemOperand::Flags getTargetMMOFlags(
871     const Instruction &I) const override;
872 
873   bool functionArgumentNeedsConsecutiveRegisters(
874       Type *Ty, CallingConv::ID CallConv, bool isVarArg,
875       const DataLayout &DL) const override;
876 
877   /// Used for exception handling on Win64.
878   bool needsFixedCatchObjects() const override;
879 
880   bool fallBackToDAGISel(const Instruction &Inst) const override;
881 
882   /// SVE code generation for fixed length vectors does not custom lower
883   /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to
884   /// merge. However, merging them creates a BUILD_VECTOR that is just as
885   /// illegal as the original, thus leading to an infinite legalisation loop.
886   /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal
887   /// vector types this override can be removed.
888   bool mergeStoresAfterLegalization(EVT VT) const override;
889 
890   // If the platform/function should have a redzone, return the size in bytes.
891   unsigned getRedZoneSize(const Function &F) const {
892     if (F.hasFnAttribute(Attribute::NoRedZone))
893       return 0;
894     return 128;
895   }
896 
897   bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const;
898   EVT getPromotedVTForPredicate(EVT VT) const;
899 
900   EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty,
901                              bool AllowUnknown = false) const override;
902 
903   bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override;
904 
905   /// If a change in streaming mode is required on entry to/return from a
906   /// function call it emits and returns the corresponding SMSTART or SMSTOP node.
907   /// \p Entry tells whether this is before/after the Call, which is necessary
908   /// because PSTATE.SM is only queried once.
909   SDValue changeStreamingMode(SelectionDAG &DAG, SDLoc DL, bool Enable,
910                               SDValue Chain, SDValue InFlag,
911                               SDValue PStateSM, bool Entry) const;
912 
913   bool isVScaleKnownToBeAPowerOfTwo() const override;
914 
915   // Normally SVE is only used for byte size vectors that do not fit within a
916   // NEON vector. This changes when OverrideNEON is true, allowing SVE to be
917   // used for 64bit and 128bit vectors as well.
918   bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const;
919 
920 private:
921   /// Keep a pointer to the AArch64Subtarget around so that we can
922   /// make the right decision when generating code for different targets.
923   const AArch64Subtarget *Subtarget;
924 
925   bool isExtFreeImpl(const Instruction *Ext) const override;
926 
927   void addTypeForNEON(MVT VT);
928   void addTypeForStreamingSVE(MVT VT);
929   void addTypeForFixedLengthSVE(MVT VT);
930   void addDRTypeForNEON(MVT VT);
931   void addQRTypeForNEON(MVT VT);
932 
933   unsigned allocateLazySaveBuffer(SDValue &Chain, const SDLoc &DL,
934                                   SelectionDAG &DAG) const;
935 
936   SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
937                                bool isVarArg,
938                                const SmallVectorImpl<ISD::InputArg> &Ins,
939                                const SDLoc &DL, SelectionDAG &DAG,
940                                SmallVectorImpl<SDValue> &InVals) const override;
941 
942   SDValue LowerCall(CallLoweringInfo & /*CLI*/,
943                     SmallVectorImpl<SDValue> &InVals) const override;
944 
945   SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
946                           CallingConv::ID CallConv, bool isVarArg,
947                           const SmallVectorImpl<CCValAssign> &RVLocs,
948                           const SDLoc &DL, SelectionDAG &DAG,
949                           SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
950                           SDValue ThisVal) const;
951 
952   SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
953   SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
954   SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const;
955   SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
956 
957   SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
958   SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
959 
960   SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const;
961 
962   SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
963   SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
964   SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
965 
966   bool
967   isEligibleForTailCallOptimization(const CallLoweringInfo &CLI) const;
968 
969   /// Finds the incoming stack arguments which overlap the given fixed stack
970   /// object and incorporates their load into the current chain. This prevents
971   /// an upcoming store from clobbering the stack argument before it's used.
972   SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
973                               MachineFrameInfo &MFI, int ClobberedFI) const;
974 
975   bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
976 
977   void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL,
978                            SDValue &Chain) const;
979 
980   bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
981                       bool isVarArg,
982                       const SmallVectorImpl<ISD::OutputArg> &Outs,
983                       LLVMContext &Context) const override;
984 
985   SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
986                       const SmallVectorImpl<ISD::OutputArg> &Outs,
987                       const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
988                       SelectionDAG &DAG) const override;
989 
990   SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
991                         unsigned Flag) const;
992   SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
993                         unsigned Flag) const;
994   SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
995                         unsigned Flag) const;
996   SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
997                         unsigned Flag) const;
998   template <class NodeTy>
999   SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1000   template <class NodeTy>
1001   SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1002   template <class NodeTy>
1003   SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1004   template <class NodeTy>
1005   SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1006   SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1007   SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1008   SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1009   SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1010   SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1011   SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase,
1012                                const SDLoc &DL, SelectionDAG &DAG) const;
1013   SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL,
1014                                  SelectionDAG &DAG) const;
1015   SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1016   SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1017   SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1018   SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
1019   SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1020   SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
1021   SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
1022                          SDValue TVal, SDValue FVal, const SDLoc &dl,
1023                          SelectionDAG &DAG) const;
1024   SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1025   SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
1026   SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1027   SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1028   SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
1029   SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
1030   SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const;
1031   SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1032   SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
1033   SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1034   SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1035   SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
1036   SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1037   SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1038   SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1039   SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1040   SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1041   SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1042   SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1043   SDValue LowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
1044   SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
1045   SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1046   SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
1047   SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG,
1048                               unsigned NewOp) const;
1049   SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
1050   SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
1051   SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
1052   SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
1053   SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;
1054   SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
1055   SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
1056   SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
1057   SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
1058   SDValue LowerCTPOP_PARITY(SDValue Op, SelectionDAG &DAG) const;
1059   SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
1060   SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const;
1061   SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
1062   SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
1063   SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1064   SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1065   SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1066   SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1067   SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1068   SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1069   SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1070   SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1071   SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
1072   SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const;
1073   SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
1074   SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
1075   SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
1076   SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
1077   SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1078   SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
1079   SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
1080   SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
1081   SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1082   SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain,
1083                                          SDValue &Size,
1084                                          SelectionDAG &DAG) const;
1085 
1086   SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
1087                                                SelectionDAG &DAG) const;
1088   SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,
1089                                                SelectionDAG &DAG) const;
1090   SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
1091   SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
1092   SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const;
1093   SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const;
1094   SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp,
1095                               SelectionDAG &DAG) const;
1096   SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;
1097   SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;
1098   SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;
1099   SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op,
1100                                             SelectionDAG &DAG) const;
1101   SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,
1102                                               SelectionDAG &DAG) const;
1103   SDValue LowerFixedLengthExtractVectorElt(SDValue Op, SelectionDAG &DAG) const;
1104   SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const;
1105   SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const;
1106   SDValue LowerFixedLengthConcatVectorsToSVE(SDValue Op,
1107                                              SelectionDAG &DAG) const;
1108   SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const;
1109   SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const;
1110   SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const;
1111   SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const;
1112   SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op,
1113                                               SelectionDAG &DAG) const;
1114 
1115   SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1116                         SmallVectorImpl<SDNode *> &Created) const override;
1117   SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1118                         SmallVectorImpl<SDNode *> &Created) const override;
1119   SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1120                           int &ExtraSteps, bool &UseOneConst,
1121                           bool Reciprocal) const override;
1122   SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1123                            int &ExtraSteps) const override;
1124   SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
1125                            const DenormalMode &Mode) const override;
1126   SDValue getSqrtResultForDenormInput(SDValue Operand,
1127                                       SelectionDAG &DAG) const override;
1128   unsigned combineRepeatedFPDivisors() const override;
1129 
1130   ConstraintType getConstraintType(StringRef Constraint) const override;
1131   Register getRegisterByName(const char* RegName, LLT VT,
1132                              const MachineFunction &MF) const override;
1133 
1134   /// Examine constraint string and operand type and determine a weight value.
1135   /// The operand object must already have been set up with the operand type.
1136   ConstraintWeight
1137   getSingleConstraintMatchWeight(AsmOperandInfo &info,
1138                                  const char *constraint) const override;
1139 
1140   std::pair<unsigned, const TargetRegisterClass *>
1141   getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1142                                StringRef Constraint, MVT VT) const override;
1143 
1144   const char *LowerXConstraint(EVT ConstraintVT) const override;
1145 
1146   void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
1147                                     std::vector<SDValue> &Ops,
1148                                     SelectionDAG &DAG) const override;
1149 
1150   unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1151     if (ConstraintCode == "Q")
1152       return InlineAsm::Constraint_Q;
1153     // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
1154     //        followed by llvm_unreachable so we'll leave them unimplemented in
1155     //        the backend for now.
1156     return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1157   }
1158 
1159   bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override;
1160   bool shouldRemoveExtendFromGSIndex(EVT IndexVT, EVT DataVT) const override;
1161   bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
1162   bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1163   bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1164   bool getIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
1165                               SDValue &Offset, ISD::MemIndexedMode &AM,
1166                               bool &IsInc, SelectionDAG &DAG) const;
1167   bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
1168                                  ISD::MemIndexedMode &AM,
1169                                  SelectionDAG &DAG) const override;
1170   bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
1171                                   SDValue &Offset, ISD::MemIndexedMode &AM,
1172                                   SelectionDAG &DAG) const override;
1173 
1174   void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1175                           SelectionDAG &DAG) const override;
1176   void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1177                              SelectionDAG &DAG) const;
1178   void ReplaceExtractSubVectorResults(SDNode *N,
1179                                       SmallVectorImpl<SDValue> &Results,
1180                                       SelectionDAG &DAG) const;
1181 
1182   bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
1183 
1184   void finalizeLowering(MachineFunction &MF) const override;
1185 
1186   bool shouldLocalize(const MachineInstr &MI,
1187                       const TargetTransformInfo *TTI) const override;
1188 
1189   bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1190                                          const APInt &OriginalDemandedBits,
1191                                          const APInt &OriginalDemandedElts,
1192                                          KnownBits &Known,
1193                                          TargetLoweringOpt &TLO,
1194                                          unsigned Depth) const override;
1195 
1196   bool isTargetCanonicalConstantNode(SDValue Op) const override;
1197 
1198   // With the exception of data-predicate transitions, no instructions are
1199   // required to cast between legal scalable vector types. However:
1200   //  1. Packed and unpacked types have different bit lengths, meaning BITCAST
1201   //     is not universally useable.
1202   //  2. Most unpacked integer types are not legal and thus integer extends
1203   //     cannot be used to convert between unpacked and packed types.
1204   // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used
1205   // to transition between unpacked and packed types of the same element type,
1206   // with BITCAST used otherwise.
1207   // This function does not handle predicate bitcasts.
1208   SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const;
1209 
1210   // Returns the runtime value for PSTATE.SM. When the function is streaming-
1211   // compatible, this generates a call to __arm_sme_state.
1212   SDValue getPStateSM(SelectionDAG &DAG, SDValue Chain, SMEAttrs Attrs,
1213                       SDLoc DL, EVT VT) const;
1214 
1215   bool isConstantUnsignedBitfieldExtractLegal(unsigned Opc, LLT Ty1,
1216                                               LLT Ty2) const override;
1217 };
1218 
1219 namespace AArch64 {
1220 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1221                          const TargetLibraryInfo *libInfo);
1222 } // end namespace AArch64
1223 
1224 } // end namespace llvm
1225 
1226 #endif
1227