1 //==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that AArch64 uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
15 #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
16 
17 #include "AArch64.h"
18 #include "llvm/CodeGen/CallingConvLower.h"
19 #include "llvm/CodeGen/SelectionDAG.h"
20 #include "llvm/CodeGen/TargetLowering.h"
21 #include "llvm/IR/CallingConv.h"
22 #include "llvm/IR/Instruction.h"
23 
24 namespace llvm {
25 
26 namespace AArch64ISD {
27 
28 // For predicated nodes where the result is a vector, the operation is
29 // controlled by a governing predicate and the inactive lanes are explicitly
30 // defined with a value, please stick the following naming convention:
31 //
32 //    _MERGE_OP<n>        The result value is a vector with inactive lanes equal
33 //                        to source operand OP<n>.
34 //
35 //    _MERGE_ZERO         The result value is a vector with inactive lanes
36 //                        actively zeroed.
37 //
38 //    _MERGE_PASSTHRU     The result value is a vector with inactive lanes equal
39 //                        to the last source operand which only purpose is being
40 //                        a passthru value.
41 //
42 // For other cases where no explicit action is needed to set the inactive lanes,
43 // or when the result is not a vector and it is needed or helpful to
44 // distinguish a node from similar unpredicated nodes, use:
45 //
46 //    _PRED
47 //
48 enum NodeType : unsigned {
49   FIRST_NUMBER = ISD::BUILTIN_OP_END,
50   WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
51   CALL,         // Function call.
52 
53   // Produces the full sequence of instructions for getting the thread pointer
54   // offset of a variable into X0, using the TLSDesc model.
55   TLSDESC_CALLSEQ,
56   ADRP,     // Page address of a TargetGlobalAddress operand.
57   ADR,      // ADR
58   ADDlow,   // Add the low 12 bits of a TargetGlobalAddress operand.
59   LOADgot,  // Load from automatically generated descriptor (e.g. Global
60             // Offset Table, TLS record).
61   RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand.
62   BRCOND,   // Conditional branch instruction; "b.cond".
63   CSEL,
64   FCSEL, // Conditional move instruction.
65   CSINV, // Conditional select invert.
66   CSNEG, // Conditional select negate.
67   CSINC, // Conditional select increment.
68 
69   // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
70   // ELF.
71   THREAD_POINTER,
72   ADC,
73   SBC, // adc, sbc instructions
74 
75   // Predicated instructions where inactive lanes produce undefined results.
76   ADD_PRED,
77   FADD_PRED,
78   FDIV_PRED,
79   FMA_PRED,
80   FMAXNM_PRED,
81   FMINNM_PRED,
82   FMUL_PRED,
83   FSUB_PRED,
84   MUL_PRED,
85   SDIV_PRED,
86   SHL_PRED,
87   SMAX_PRED,
88   SMIN_PRED,
89   SRA_PRED,
90   SRL_PRED,
91   SUB_PRED,
92   UDIV_PRED,
93   UMAX_PRED,
94   UMIN_PRED,
95 
96   // Predicated instructions with the result of inactive lanes provided by the
97   // last operand.
98   FABS_MERGE_PASSTHRU,
99   FCEIL_MERGE_PASSTHRU,
100   FFLOOR_MERGE_PASSTHRU,
101   FNEARBYINT_MERGE_PASSTHRU,
102   FNEG_MERGE_PASSTHRU,
103   FRECPX_MERGE_PASSTHRU,
104   FRINT_MERGE_PASSTHRU,
105   FROUND_MERGE_PASSTHRU,
106   FROUNDEVEN_MERGE_PASSTHRU,
107   FSQRT_MERGE_PASSTHRU,
108   FTRUNC_MERGE_PASSTHRU,
109   FP_ROUND_MERGE_PASSTHRU,
110   FP_EXTEND_MERGE_PASSTHRU,
111   UINT_TO_FP_MERGE_PASSTHRU,
112   SINT_TO_FP_MERGE_PASSTHRU,
113   FCVTZU_MERGE_PASSTHRU,
114   FCVTZS_MERGE_PASSTHRU,
115   SIGN_EXTEND_INREG_MERGE_PASSTHRU,
116   ZERO_EXTEND_INREG_MERGE_PASSTHRU,
117   ABS_MERGE_PASSTHRU,
118   NEG_MERGE_PASSTHRU,
119 
120   SETCC_MERGE_ZERO,
121 
122   // Arithmetic instructions which write flags.
123   ADDS,
124   SUBS,
125   ADCS,
126   SBCS,
127   ANDS,
128 
129   // Conditional compares. Operands: left,right,falsecc,cc,flags
130   CCMP,
131   CCMN,
132   FCCMP,
133 
134   // Floating point comparison
135   FCMP,
136 
137   // Scalar extract
138   EXTR,
139 
140   // Scalar-to-vector duplication
141   DUP,
142   DUPLANE8,
143   DUPLANE16,
144   DUPLANE32,
145   DUPLANE64,
146 
147   // Vector immedate moves
148   MOVI,
149   MOVIshift,
150   MOVIedit,
151   MOVImsl,
152   FMOV,
153   MVNIshift,
154   MVNImsl,
155 
156   // Vector immediate ops
157   BICi,
158   ORRi,
159 
160   // Vector bitwise select: similar to ISD::VSELECT but not all bits within an
161   // element must be identical.
162   BSP,
163 
164   // Vector arithmetic negation
165   NEG,
166 
167   // Vector shuffles
168   ZIP1,
169   ZIP2,
170   UZP1,
171   UZP2,
172   TRN1,
173   TRN2,
174   REV16,
175   REV32,
176   REV64,
177   EXT,
178 
179   // Vector shift by scalar
180   VSHL,
181   VLSHR,
182   VASHR,
183 
184   // Vector shift by scalar (again)
185   SQSHL_I,
186   UQSHL_I,
187   SQSHLU_I,
188   SRSHR_I,
189   URSHR_I,
190 
191   // Vector shift by constant and insert
192   VSLI,
193   VSRI,
194 
195   // Vector comparisons
196   CMEQ,
197   CMGE,
198   CMGT,
199   CMHI,
200   CMHS,
201   FCMEQ,
202   FCMGE,
203   FCMGT,
204 
205   // Vector zero comparisons
206   CMEQz,
207   CMGEz,
208   CMGTz,
209   CMLEz,
210   CMLTz,
211   FCMEQz,
212   FCMGEz,
213   FCMGTz,
214   FCMLEz,
215   FCMLTz,
216 
217   // Vector across-lanes addition
218   // Only the lower result lane is defined.
219   SADDV,
220   UADDV,
221 
222   // Vector halving addition
223   SHADD,
224   UHADD,
225 
226   // Vector rounding halving addition
227   SRHADD,
228   URHADD,
229 
230   // Absolute difference
231   UABD,
232   SABD,
233 
234   // Vector across-lanes min/max
235   // Only the lower result lane is defined.
236   SMINV,
237   UMINV,
238   SMAXV,
239   UMAXV,
240 
241   SADDV_PRED,
242   UADDV_PRED,
243   SMAXV_PRED,
244   UMAXV_PRED,
245   SMINV_PRED,
246   UMINV_PRED,
247   ORV_PRED,
248   EORV_PRED,
249   ANDV_PRED,
250 
251   // Vector bitwise insertion
252   BIT,
253 
254   // Compare-and-branch
255   CBZ,
256   CBNZ,
257   TBZ,
258   TBNZ,
259 
260   // Tail calls
261   TC_RETURN,
262 
263   // Custom prefetch handling
264   PREFETCH,
265 
266   // {s|u}int to FP within a FP register.
267   SITOF,
268   UITOF,
269 
270   /// Natural vector cast. ISD::BITCAST is not natural in the big-endian
271   /// world w.r.t vectors; which causes additional REV instructions to be
272   /// generated to compensate for the byte-swapping. But sometimes we do
273   /// need to re-interpret the data in SIMD vector registers in big-endian
274   /// mode without emitting such REV instructions.
275   NVCAST,
276 
277   SMULL,
278   UMULL,
279 
280   // Reciprocal estimates and steps.
281   FRECPE,
282   FRECPS,
283   FRSQRTE,
284   FRSQRTS,
285 
286   SUNPKHI,
287   SUNPKLO,
288   UUNPKHI,
289   UUNPKLO,
290 
291   CLASTA_N,
292   CLASTB_N,
293   LASTA,
294   LASTB,
295   REV,
296   TBL,
297 
298   // Floating-point reductions.
299   FADDA_PRED,
300   FADDV_PRED,
301   FMAXV_PRED,
302   FMAXNMV_PRED,
303   FMINV_PRED,
304   FMINNMV_PRED,
305 
306   INSR,
307   PTEST,
308   PTRUE,
309 
310   BITREVERSE_MERGE_PASSTHRU,
311   BSWAP_MERGE_PASSTHRU,
312   CTLZ_MERGE_PASSTHRU,
313   CTPOP_MERGE_PASSTHRU,
314   DUP_MERGE_PASSTHRU,
315   INDEX_VECTOR,
316 
317   // Cast between vectors of the same element type but differ in length.
318   REINTERPRET_CAST,
319 
320   LD1_MERGE_ZERO,
321   LD1S_MERGE_ZERO,
322   LDNF1_MERGE_ZERO,
323   LDNF1S_MERGE_ZERO,
324   LDFF1_MERGE_ZERO,
325   LDFF1S_MERGE_ZERO,
326   LD1RQ_MERGE_ZERO,
327   LD1RO_MERGE_ZERO,
328 
329   // Structured loads.
330   SVE_LD2_MERGE_ZERO,
331   SVE_LD3_MERGE_ZERO,
332   SVE_LD4_MERGE_ZERO,
333 
334   // Unsigned gather loads.
335   GLD1_MERGE_ZERO,
336   GLD1_SCALED_MERGE_ZERO,
337   GLD1_UXTW_MERGE_ZERO,
338   GLD1_SXTW_MERGE_ZERO,
339   GLD1_UXTW_SCALED_MERGE_ZERO,
340   GLD1_SXTW_SCALED_MERGE_ZERO,
341   GLD1_IMM_MERGE_ZERO,
342 
343   // Signed gather loads
344   GLD1S_MERGE_ZERO,
345   GLD1S_SCALED_MERGE_ZERO,
346   GLD1S_UXTW_MERGE_ZERO,
347   GLD1S_SXTW_MERGE_ZERO,
348   GLD1S_UXTW_SCALED_MERGE_ZERO,
349   GLD1S_SXTW_SCALED_MERGE_ZERO,
350   GLD1S_IMM_MERGE_ZERO,
351 
352   // Unsigned gather loads.
353   GLDFF1_MERGE_ZERO,
354   GLDFF1_SCALED_MERGE_ZERO,
355   GLDFF1_UXTW_MERGE_ZERO,
356   GLDFF1_SXTW_MERGE_ZERO,
357   GLDFF1_UXTW_SCALED_MERGE_ZERO,
358   GLDFF1_SXTW_SCALED_MERGE_ZERO,
359   GLDFF1_IMM_MERGE_ZERO,
360 
361   // Signed gather loads.
362   GLDFF1S_MERGE_ZERO,
363   GLDFF1S_SCALED_MERGE_ZERO,
364   GLDFF1S_UXTW_MERGE_ZERO,
365   GLDFF1S_SXTW_MERGE_ZERO,
366   GLDFF1S_UXTW_SCALED_MERGE_ZERO,
367   GLDFF1S_SXTW_SCALED_MERGE_ZERO,
368   GLDFF1S_IMM_MERGE_ZERO,
369 
370   // Non-temporal gather loads
371   GLDNT1_MERGE_ZERO,
372   GLDNT1_INDEX_MERGE_ZERO,
373   GLDNT1S_MERGE_ZERO,
374 
375   // Contiguous masked store.
376   ST1_PRED,
377 
378   // Scatter store
379   SST1_PRED,
380   SST1_SCALED_PRED,
381   SST1_UXTW_PRED,
382   SST1_SXTW_PRED,
383   SST1_UXTW_SCALED_PRED,
384   SST1_SXTW_SCALED_PRED,
385   SST1_IMM_PRED,
386 
387   // Non-temporal scatter store
388   SSTNT1_PRED,
389   SSTNT1_INDEX_PRED,
390 
391   // Strict (exception-raising) floating point comparison
392   STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
393   STRICT_FCMPE,
394 
395   // NEON Load/Store with post-increment base updates
396   LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
397   LD3post,
398   LD4post,
399   ST2post,
400   ST3post,
401   ST4post,
402   LD1x2post,
403   LD1x3post,
404   LD1x4post,
405   ST1x2post,
406   ST1x3post,
407   ST1x4post,
408   LD1DUPpost,
409   LD2DUPpost,
410   LD3DUPpost,
411   LD4DUPpost,
412   LD1LANEpost,
413   LD2LANEpost,
414   LD3LANEpost,
415   LD4LANEpost,
416   ST2LANEpost,
417   ST3LANEpost,
418   ST4LANEpost,
419 
420   STG,
421   STZG,
422   ST2G,
423   STZ2G,
424 
425   LDP,
426   STP,
427   STNP,
428 
429   // Pseudo for a OBJC call that gets emitted together with a special `mov
430   // x29, x29` marker instruction.
431   CALL_RVMARKER
432 };
433 
434 } // end namespace AArch64ISD
435 
436 namespace {
437 
438 // Any instruction that defines a 32-bit result zeros out the high half of the
439 // register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
440 // be copying from a truncate. But any other 32-bit operation will zero-extend
441 // up to 64 bits. AssertSext/AssertZext aren't saying anything about the upper
442 // 32 bits, they're probably just qualifying a CopyFromReg.
443 // FIXME: X86 also checks for CMOV here. Do we need something similar?
isDef32(const SDNode & N)444 static inline bool isDef32(const SDNode &N) {
445   unsigned Opc = N.getOpcode();
446   return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
447          Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
448          Opc != ISD::AssertZext;
449 }
450 
451 } // end anonymous namespace
452 
453 class AArch64Subtarget;
454 class AArch64TargetMachine;
455 
456 class AArch64TargetLowering : public TargetLowering {
457 public:
458   explicit AArch64TargetLowering(const TargetMachine &TM,
459                                  const AArch64Subtarget &STI);
460 
461   /// Selects the correct CCAssignFn for a given CallingConvention value.
462   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
463 
464   /// Selects the correct CCAssignFn for a given CallingConvention value.
465   CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const;
466 
467   /// Determine which of the bits specified in Mask are known to be either zero
468   /// or one and return them in the KnownZero/KnownOne bitsets.
469   void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
470                                      const APInt &DemandedElts,
471                                      const SelectionDAG &DAG,
472                                      unsigned Depth = 0) const override;
473 
474   MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
475     // Returning i64 unconditionally here (i.e. even for ILP32) means that the
476     // *DAG* representation of pointers will always be 64-bits. They will be
477     // truncated and extended when transferred to memory, but the 64-bit DAG
478     // allows us to use AArch64's addressing modes much more easily.
479     return MVT::getIntegerVT(64);
480   }
481 
482   bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
483                                     const APInt &DemandedElts,
484                                     TargetLoweringOpt &TLO) const override;
485 
486   MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
487 
488   /// Returns true if the target allows unaligned memory accesses of the
489   /// specified type.
490   bool allowsMisalignedMemoryAccesses(
491       EVT VT, unsigned AddrSpace = 0, unsigned Align = 1,
492       MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
493       bool *Fast = nullptr) const override;
494   /// LLT variant.
495   bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace,
496                                       Align Alignment,
497                                       MachineMemOperand::Flags Flags,
498                                       bool *Fast = nullptr) const override;
499 
500   /// Provide custom lowering hooks for some operations.
501   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
502 
503   const char *getTargetNodeName(unsigned Opcode) const override;
504 
505   SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
506 
507   /// This method returns a target specific FastISel object, or null if the
508   /// target does not support "fast" ISel.
509   FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
510                            const TargetLibraryInfo *libInfo) const override;
511 
512   bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
513 
514   bool isFPImmLegal(const APFloat &Imm, EVT VT,
515                     bool ForCodeSize) const override;
516 
517   /// Return true if the given shuffle mask can be codegen'd directly, or if it
518   /// should be stack expanded.
519   bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
520 
521   /// Return the ISD::SETCC ValueType.
522   EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
523                          EVT VT) const override;
524 
525   SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
526 
527   MachineBasicBlock *EmitF128CSEL(MachineInstr &MI,
528                                   MachineBasicBlock *BB) const;
529 
530   MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
531                                            MachineBasicBlock *BB) const;
532 
533   MachineBasicBlock *
534   EmitInstrWithCustomInserter(MachineInstr &MI,
535                               MachineBasicBlock *MBB) const override;
536 
537   bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
538                           MachineFunction &MF,
539                           unsigned Intrinsic) const override;
540 
541   bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
542                              EVT NewVT) const override;
543 
544   bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
545   bool isTruncateFree(EVT VT1, EVT VT2) const override;
546 
547   bool isProfitableToHoist(Instruction *I) const override;
548 
549   bool isZExtFree(Type *Ty1, Type *Ty2) const override;
550   bool isZExtFree(EVT VT1, EVT VT2) const override;
551   bool isZExtFree(SDValue Val, EVT VT2) const override;
552 
553   bool shouldSinkOperands(Instruction *I,
554                           SmallVectorImpl<Use *> &Ops) const override;
555 
556   bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override;
557 
getMaxSupportedInterleaveFactor()558   unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
559 
560   bool lowerInterleavedLoad(LoadInst *LI,
561                             ArrayRef<ShuffleVectorInst *> Shuffles,
562                             ArrayRef<unsigned> Indices,
563                             unsigned Factor) const override;
564   bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
565                              unsigned Factor) const override;
566 
567   bool isLegalAddImmediate(int64_t) const override;
568   bool isLegalICmpImmediate(int64_t) const override;
569 
570   bool shouldConsiderGEPOffsetSplit() const override;
571 
572   EVT getOptimalMemOpType(const MemOp &Op,
573                           const AttributeList &FuncAttributes) const override;
574 
575   LLT getOptimalMemOpLLT(const MemOp &Op,
576                          const AttributeList &FuncAttributes) const override;
577 
578   /// Return true if the addressing mode represented by AM is legal for this
579   /// target, for a load/store of the specified type.
580   bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
581                              unsigned AS,
582                              Instruction *I = nullptr) const override;
583 
584   /// Return the cost of the scaling factor used in the addressing
585   /// mode represented by AM for this target, for a load/store
586   /// of the specified type.
587   /// If the AM is supported, the return value must be >= 0.
588   /// If the AM is not supported, it returns a negative value.
589   int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
590                            unsigned AS) const override;
591 
592   /// Return true if an FMA operation is faster than a pair of fmul and fadd
593   /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
594   /// returns true, otherwise fmuladd is expanded to fmul + fadd.
595   bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
596                                   EVT VT) const override;
597   bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
598 
599   const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
600 
601   /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
602   bool isDesirableToCommuteWithShift(const SDNode *N,
603                                      CombineLevel Level) const override;
604 
605   /// Returns true if it is beneficial to convert a load of a constant
606   /// to just the constant itself.
607   bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
608                                          Type *Ty) const override;
609 
610   /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
611   /// with this index.
612   bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
613                                unsigned Index) const override;
614 
shouldFormOverflowOp(unsigned Opcode,EVT VT,bool MathUsed)615   bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
616                             bool MathUsed) const override {
617     // Using overflow ops for overflow checks only should beneficial on
618     // AArch64.
619     return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
620   }
621 
622   Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
623                         AtomicOrdering Ord) const override;
624   Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
625                               Value *Addr, AtomicOrdering Ord) const override;
626 
627   void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const override;
628 
629   TargetLoweringBase::AtomicExpansionKind
630   shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
631   bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
632   TargetLoweringBase::AtomicExpansionKind
633   shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
634 
635   TargetLoweringBase::AtomicExpansionKind
636   shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
637 
638   bool useLoadStackGuardNode() const override;
639   TargetLoweringBase::LegalizeTypeAction
640   getPreferredVectorAction(MVT VT) const override;
641 
642   /// If the target has a standard location for the stack protector cookie,
643   /// returns the address of that location. Otherwise, returns nullptr.
644   Value *getIRStackGuard(IRBuilder<> &IRB) const override;
645 
646   void insertSSPDeclarations(Module &M) const override;
647   Value *getSDagStackGuard(const Module &M) const override;
648   Function *getSSPStackGuardCheck(const Module &M) const override;
649 
650   /// If the target has a standard location for the unsafe stack pointer,
651   /// returns the address of that location. Otherwise, returns nullptr.
652   Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
653 
654   /// If a physical register, this returns the register that receives the
655   /// exception address on entry to an EH pad.
656   Register
getExceptionPointerRegister(const Constant * PersonalityFn)657   getExceptionPointerRegister(const Constant *PersonalityFn) const override {
658     // FIXME: This is a guess. Has this been defined yet?
659     return AArch64::X0;
660   }
661 
662   /// If a physical register, this returns the register that receives the
663   /// exception typeid on entry to a landing pad.
664   Register
getExceptionSelectorRegister(const Constant * PersonalityFn)665   getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
666     // FIXME: This is a guess. Has this been defined yet?
667     return AArch64::X1;
668   }
669 
670   bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
671 
canMergeStoresTo(unsigned AddressSpace,EVT MemVT,const SelectionDAG & DAG)672   bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
673                         const SelectionDAG &DAG) const override {
674     // Do not merge to float value size (128 bytes) if no implicit
675     // float attribute is set.
676 
677     bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute(
678         Attribute::NoImplicitFloat);
679 
680     if (NoFloat)
681       return (MemVT.getSizeInBits() <= 64);
682     return true;
683   }
684 
isCheapToSpeculateCttz()685   bool isCheapToSpeculateCttz() const override {
686     return true;
687   }
688 
isCheapToSpeculateCtlz()689   bool isCheapToSpeculateCtlz() const override {
690     return true;
691   }
692 
693   bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
694 
hasAndNotCompare(SDValue V)695   bool hasAndNotCompare(SDValue V) const override {
696     // We can use bics for any scalar.
697     return V.getValueType().isScalarInteger();
698   }
699 
hasAndNot(SDValue Y)700   bool hasAndNot(SDValue Y) const override {
701     EVT VT = Y.getValueType();
702 
703     if (!VT.isVector())
704       return hasAndNotCompare(Y);
705 
706     return VT.getSizeInBits() >= 64; // vector 'bic'
707   }
708 
709   bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
710       SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
711       unsigned OldShiftOpcode, unsigned NewShiftOpcode,
712       SelectionDAG &DAG) const override;
713 
714   bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
715 
shouldTransformSignedTruncationCheck(EVT XVT,unsigned KeptBits)716   bool shouldTransformSignedTruncationCheck(EVT XVT,
717                                             unsigned KeptBits) const override {
718     // For vectors, we don't have a preference..
719     if (XVT.isVector())
720       return false;
721 
722     auto VTIsOk = [](EVT VT) -> bool {
723       return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
724              VT == MVT::i64;
725     };
726 
727     // We are ok with KeptBitsVT being byte/word/dword, what SXT supports.
728     // XVT will be larger than KeptBitsVT.
729     MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
730     return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
731   }
732 
733   bool preferIncOfAddToSubOfNot(EVT VT) const override;
734 
hasBitPreservingFPLogic(EVT VT)735   bool hasBitPreservingFPLogic(EVT VT) const override {
736     // FIXME: Is this always true? It should be true for vectors at least.
737     return VT == MVT::f32 || VT == MVT::f64;
738   }
739 
supportSplitCSR(MachineFunction * MF)740   bool supportSplitCSR(MachineFunction *MF) const override {
741     return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
742            MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
743   }
744   void initializeSplitCSR(MachineBasicBlock *Entry) const override;
745   void insertCopiesSplitCSR(
746       MachineBasicBlock *Entry,
747       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
748 
supportSwiftError()749   bool supportSwiftError() const override {
750     return true;
751   }
752 
753   /// Enable aggressive FMA fusion on targets that want it.
754   bool enableAggressiveFMAFusion(EVT VT) const override;
755 
756   /// Returns the size of the platform's va_list object.
757   unsigned getVaListSizeInBits(const DataLayout &DL) const override;
758 
759   /// Returns true if \p VecTy is a legal interleaved access type. This
760   /// function checks the vector element type and the overall width of the
761   /// vector.
762   bool isLegalInterleavedAccessType(VectorType *VecTy,
763                                     const DataLayout &DL) const;
764 
765   /// Returns the number of interleaved accesses that will be generated when
766   /// lowering accesses of the given type.
767   unsigned getNumInterleavedAccesses(VectorType *VecTy,
768                                      const DataLayout &DL) const;
769 
770   MachineMemOperand::Flags getTargetMMOFlags(
771     const Instruction &I) const override;
772 
773   bool functionArgumentNeedsConsecutiveRegisters(Type *Ty,
774                                                  CallingConv::ID CallConv,
775                                                  bool isVarArg) const override;
776   /// Used for exception handling on Win64.
777   bool needsFixedCatchObjects() const override;
778 
779   bool fallBackToDAGISel(const Instruction &Inst) const override;
780 
781   /// SVE code generation for fixed length vectors does not custom lower
782   /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to
783   /// merge. However, merging them creates a BUILD_VECTOR that is just as
784   /// illegal as the original, thus leading to an infinite legalisation loop.
785   /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal
786   /// vector types this override can be removed.
787   bool mergeStoresAfterLegalization(EVT VT) const override;
788 
789 private:
790   /// Keep a pointer to the AArch64Subtarget around so that we can
791   /// make the right decision when generating code for different targets.
792   const AArch64Subtarget *Subtarget;
793 
794   bool isExtFreeImpl(const Instruction *Ext) const override;
795 
796   void addTypeForNEON(MVT VT, MVT PromotedBitwiseVT);
797   void addTypeForFixedLengthSVE(MVT VT);
798   void addDRTypeForNEON(MVT VT);
799   void addQRTypeForNEON(MVT VT);
800 
801   SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
802                                bool isVarArg,
803                                const SmallVectorImpl<ISD::InputArg> &Ins,
804                                const SDLoc &DL, SelectionDAG &DAG,
805                                SmallVectorImpl<SDValue> &InVals) const override;
806 
807   SDValue LowerCall(CallLoweringInfo & /*CLI*/,
808                     SmallVectorImpl<SDValue> &InVals) const override;
809 
810   SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
811                           CallingConv::ID CallConv, bool isVarArg,
812                           const SmallVectorImpl<ISD::InputArg> &Ins,
813                           const SDLoc &DL, SelectionDAG &DAG,
814                           SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
815                           SDValue ThisVal) const;
816 
817   SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
818   SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
819 
820   SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
821   SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
822 
823   SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
824 
825   bool isEligibleForTailCallOptimization(
826       SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
827       const SmallVectorImpl<ISD::OutputArg> &Outs,
828       const SmallVectorImpl<SDValue> &OutVals,
829       const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
830 
831   /// Finds the incoming stack arguments which overlap the given fixed stack
832   /// object and incorporates their load into the current chain. This prevents
833   /// an upcoming store from clobbering the stack argument before it's used.
834   SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
835                               MachineFrameInfo &MFI, int ClobberedFI) const;
836 
837   bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
838 
839   void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL,
840                            SDValue &Chain) const;
841 
842   bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
843                       bool isVarArg,
844                       const SmallVectorImpl<ISD::OutputArg> &Outs,
845                       LLVMContext &Context) const override;
846 
847   SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
848                       const SmallVectorImpl<ISD::OutputArg> &Outs,
849                       const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
850                       SelectionDAG &DAG) const override;
851 
852   SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
853                         unsigned Flag) const;
854   SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
855                         unsigned Flag) const;
856   SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
857                         unsigned Flag) const;
858   SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
859                         unsigned Flag) const;
860   template <class NodeTy>
861   SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
862   template <class NodeTy>
863   SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
864   template <class NodeTy>
865   SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
866   template <class NodeTy>
867   SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
868   SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
869   SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
870   SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
871   SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
872   SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
873   SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase,
874                                const SDLoc &DL, SelectionDAG &DAG) const;
875   SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL,
876                                  SelectionDAG &DAG) const;
877   SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
878   SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
879   SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
880   SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
881   SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
882   SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
883                          SDValue TVal, SDValue FVal, const SDLoc &dl,
884                          SelectionDAG &DAG) const;
885   SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
886   SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
887   SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
888   SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
889   SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
890   SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
891   SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const;
892   SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
893   SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
894   SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
895   SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
896   SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
897   SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
898   SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
899   SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
900   SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
901   SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
902   SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
903   SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
904   SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
905   SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
906   SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, unsigned NewOp,
907                               bool OverrideNEON = false) const;
908   SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
909   SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
910   SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
911   SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;
912   SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
913   SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
914   SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
915   SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
916   SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
917   SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
918   SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
919   SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
920   SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
921   SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
922   SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
923   SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
924   SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
925   SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
926   SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
927   SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const;
928   SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
929   SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
930   SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
931   SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
932   SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
933   SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
934   SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
935   SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
936   SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain,
937                                          SDValue &Size,
938                                          SelectionDAG &DAG) const;
939   SDValue LowerSVEStructLoad(unsigned Intrinsic, ArrayRef<SDValue> LoadOps,
940                              EVT VT, SelectionDAG &DAG, const SDLoc &DL) const;
941 
942   SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
943                                                SelectionDAG &DAG) const;
944   SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,
945                                                SelectionDAG &DAG) const;
946   SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
947   SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const;
948   SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const;
949   SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp,
950                               SelectionDAG &DAG) const;
951   SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;
952   SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;
953   SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;
954   SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,
955                                               SelectionDAG &DAG) const;
956 
957   SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
958                         SmallVectorImpl<SDNode *> &Created) const override;
959   SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
960                           int &ExtraSteps, bool &UseOneConst,
961                           bool Reciprocal) const override;
962   SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
963                            int &ExtraSteps) const override;
964   SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
965                            const DenormalMode &Mode) const override;
966   SDValue getSqrtResultForDenormInput(SDValue Operand,
967                                       SelectionDAG &DAG) const override;
968   unsigned combineRepeatedFPDivisors() const override;
969 
970   ConstraintType getConstraintType(StringRef Constraint) const override;
971   Register getRegisterByName(const char* RegName, LLT VT,
972                              const MachineFunction &MF) const override;
973 
974   /// Examine constraint string and operand type and determine a weight value.
975   /// The operand object must already have been set up with the operand type.
976   ConstraintWeight
977   getSingleConstraintMatchWeight(AsmOperandInfo &info,
978                                  const char *constraint) const override;
979 
980   std::pair<unsigned, const TargetRegisterClass *>
981   getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
982                                StringRef Constraint, MVT VT) const override;
983 
984   const char *LowerXConstraint(EVT ConstraintVT) const override;
985 
986   void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
987                                     std::vector<SDValue> &Ops,
988                                     SelectionDAG &DAG) const override;
989 
getInlineAsmMemConstraint(StringRef ConstraintCode)990   unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
991     if (ConstraintCode == "Q")
992       return InlineAsm::Constraint_Q;
993     // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
994     //        followed by llvm_unreachable so we'll leave them unimplemented in
995     //        the backend for now.
996     return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
997   }
998 
999   bool shouldRemoveExtendFromGSIndex(EVT VT) const override;
1000   bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
1001   bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1002   bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1003   bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
1004                               ISD::MemIndexedMode &AM, bool &IsInc,
1005                               SelectionDAG &DAG) const;
1006   bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
1007                                  ISD::MemIndexedMode &AM,
1008                                  SelectionDAG &DAG) const override;
1009   bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
1010                                   SDValue &Offset, ISD::MemIndexedMode &AM,
1011                                   SelectionDAG &DAG) const override;
1012 
1013   void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1014                           SelectionDAG &DAG) const override;
1015   void ReplaceExtractSubVectorResults(SDNode *N,
1016                                       SmallVectorImpl<SDValue> &Results,
1017                                       SelectionDAG &DAG) const;
1018 
1019   bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
1020 
1021   void finalizeLowering(MachineFunction &MF) const override;
1022 
1023   bool shouldLocalize(const MachineInstr &MI,
1024                       const TargetTransformInfo *TTI) const override;
1025 
1026   // Normally SVE is only used for byte size vectors that do not fit within a
1027   // NEON vector. This changes when OverrideNEON is true, allowing SVE to be
1028   // used for 64bit and 128bit vectors as well.
1029   bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const;
1030 
1031   // With the exception of data-predicate transitions, no instructions are
1032   // required to cast between legal scalable vector types. However:
1033   //  1. Packed and unpacked types have different bit lengths, meaning BITCAST
1034   //     is not universally useable.
1035   //  2. Most unpacked integer types are not legal and thus integer extends
1036   //     cannot be used to convert between unpacked and packed types.
1037   // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used
1038   // to transition between unpacked and packed types of the same element type,
1039   // with BITCAST used otherwise.
1040   SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const;
1041 };
1042 
1043 namespace AArch64 {
1044 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1045                          const TargetLibraryInfo *libInfo);
1046 } // end namespace AArch64
1047 
1048 } // end namespace llvm
1049 
1050 #endif
1051