1 //==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that AArch64 uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
15 #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
16
17 #include "AArch64.h"
18 #include "llvm/CodeGen/CallingConvLower.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/SelectionDAG.h"
21 #include "llvm/CodeGen/TargetLowering.h"
22 #include "llvm/IR/CallingConv.h"
23 #include "llvm/IR/Instruction.h"
24
25 namespace llvm {
26
27 namespace AArch64ISD {
28
29 // For predicated nodes where the result is a vector, the operation is
30 // controlled by a governing predicate and the inactive lanes are explicitly
31 // defined with a value, please stick the following naming convention:
32 //
33 // _MERGE_OP<n> The result value is a vector with inactive lanes equal
34 // to source operand OP<n>.
35 //
36 // _MERGE_ZERO The result value is a vector with inactive lanes
37 // actively zeroed.
38 //
39 // _MERGE_PASSTHRU The result value is a vector with inactive lanes equal
40 // to the last source operand which only purpose is being
41 // a passthru value.
42 //
43 // For other cases where no explicit action is needed to set the inactive lanes,
44 // or when the result is not a vector and it is needed or helpful to
45 // distinguish a node from similar unpredicated nodes, use:
46 //
47 // _PRED
48 //
49 enum NodeType : unsigned {
50 FIRST_NUMBER = ISD::BUILTIN_OP_END,
51 WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
52 CALL, // Function call.
53
54 // Pseudo for a OBJC call that gets emitted together with a special `mov
55 // x29, x29` marker instruction.
56 CALL_RVMARKER,
57
58 // Produces the full sequence of instructions for getting the thread pointer
59 // offset of a variable into X0, using the TLSDesc model.
60 TLSDESC_CALLSEQ,
61 ADRP, // Page address of a TargetGlobalAddress operand.
62 ADR, // ADR
63 ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand.
64 LOADgot, // Load from automatically generated descriptor (e.g. Global
65 // Offset Table, TLS record).
66 RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand.
67 BRCOND, // Conditional branch instruction; "b.cond".
68 CSEL,
69 CSINV, // Conditional select invert.
70 CSNEG, // Conditional select negate.
71 CSINC, // Conditional select increment.
72
73 // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
74 // ELF.
75 THREAD_POINTER,
76 ADC,
77 SBC, // adc, sbc instructions
78
79 // Predicated instructions where inactive lanes produce undefined results.
80 ADD_PRED,
81 FADD_PRED,
82 FDIV_PRED,
83 FMA_PRED,
84 FMAXNM_PRED,
85 FMINNM_PRED,
86 FMAX_PRED,
87 FMIN_PRED,
88 FMUL_PRED,
89 FSUB_PRED,
90 MUL_PRED,
91 MULHS_PRED,
92 MULHU_PRED,
93 SDIV_PRED,
94 SHL_PRED,
95 SMAX_PRED,
96 SMIN_PRED,
97 SRA_PRED,
98 SRL_PRED,
99 SUB_PRED,
100 UDIV_PRED,
101 UMAX_PRED,
102 UMIN_PRED,
103
104 // Unpredicated vector instructions
105 BIC,
106
107 // Predicated instructions with the result of inactive lanes provided by the
108 // last operand.
109 FABS_MERGE_PASSTHRU,
110 FCEIL_MERGE_PASSTHRU,
111 FFLOOR_MERGE_PASSTHRU,
112 FNEARBYINT_MERGE_PASSTHRU,
113 FNEG_MERGE_PASSTHRU,
114 FRECPX_MERGE_PASSTHRU,
115 FRINT_MERGE_PASSTHRU,
116 FROUND_MERGE_PASSTHRU,
117 FROUNDEVEN_MERGE_PASSTHRU,
118 FSQRT_MERGE_PASSTHRU,
119 FTRUNC_MERGE_PASSTHRU,
120 FP_ROUND_MERGE_PASSTHRU,
121 FP_EXTEND_MERGE_PASSTHRU,
122 UINT_TO_FP_MERGE_PASSTHRU,
123 SINT_TO_FP_MERGE_PASSTHRU,
124 FCVTZU_MERGE_PASSTHRU,
125 FCVTZS_MERGE_PASSTHRU,
126 SIGN_EXTEND_INREG_MERGE_PASSTHRU,
127 ZERO_EXTEND_INREG_MERGE_PASSTHRU,
128 ABS_MERGE_PASSTHRU,
129 NEG_MERGE_PASSTHRU,
130
131 SETCC_MERGE_ZERO,
132
133 // Arithmetic instructions which write flags.
134 ADDS,
135 SUBS,
136 ADCS,
137 SBCS,
138 ANDS,
139
140 // Conditional compares. Operands: left,right,falsecc,cc,flags
141 CCMP,
142 CCMN,
143 FCCMP,
144
145 // Floating point comparison
146 FCMP,
147
148 // Scalar extract
149 EXTR,
150
151 // Scalar-to-vector duplication
152 DUP,
153 DUPLANE8,
154 DUPLANE16,
155 DUPLANE32,
156 DUPLANE64,
157
158 // Vector immedate moves
159 MOVI,
160 MOVIshift,
161 MOVIedit,
162 MOVImsl,
163 FMOV,
164 MVNIshift,
165 MVNImsl,
166
167 // Vector immediate ops
168 BICi,
169 ORRi,
170
171 // Vector bitwise select: similar to ISD::VSELECT but not all bits within an
172 // element must be identical.
173 BSP,
174
175 // Vector shuffles
176 ZIP1,
177 ZIP2,
178 UZP1,
179 UZP2,
180 TRN1,
181 TRN2,
182 REV16,
183 REV32,
184 REV64,
185 EXT,
186 SPLICE,
187
188 // Vector shift by scalar
189 VSHL,
190 VLSHR,
191 VASHR,
192
193 // Vector shift by scalar (again)
194 SQSHL_I,
195 UQSHL_I,
196 SQSHLU_I,
197 SRSHR_I,
198 URSHR_I,
199
200 // Vector shift by constant and insert
201 VSLI,
202 VSRI,
203
204 // Vector comparisons
205 CMEQ,
206 CMGE,
207 CMGT,
208 CMHI,
209 CMHS,
210 FCMEQ,
211 FCMGE,
212 FCMGT,
213
214 // Vector zero comparisons
215 CMEQz,
216 CMGEz,
217 CMGTz,
218 CMLEz,
219 CMLTz,
220 FCMEQz,
221 FCMGEz,
222 FCMGTz,
223 FCMLEz,
224 FCMLTz,
225
226 // Vector across-lanes addition
227 // Only the lower result lane is defined.
228 SADDV,
229 UADDV,
230
231 // Vector halving addition
232 SHADD,
233 UHADD,
234
235 // Vector rounding halving addition
236 SRHADD,
237 URHADD,
238
239 // Unsigned Add Long Pairwise
240 UADDLP,
241
242 // udot/sdot instructions
243 UDOT,
244 SDOT,
245
246 // Vector across-lanes min/max
247 // Only the lower result lane is defined.
248 SMINV,
249 UMINV,
250 SMAXV,
251 UMAXV,
252
253 SADDV_PRED,
254 UADDV_PRED,
255 SMAXV_PRED,
256 UMAXV_PRED,
257 SMINV_PRED,
258 UMINV_PRED,
259 ORV_PRED,
260 EORV_PRED,
261 ANDV_PRED,
262
263 // Vector bitwise insertion
264 BIT,
265
266 // Compare-and-branch
267 CBZ,
268 CBNZ,
269 TBZ,
270 TBNZ,
271
272 // Tail calls
273 TC_RETURN,
274
275 // Custom prefetch handling
276 PREFETCH,
277
278 // {s|u}int to FP within a FP register.
279 SITOF,
280 UITOF,
281
282 /// Natural vector cast. ISD::BITCAST is not natural in the big-endian
283 /// world w.r.t vectors; which causes additional REV instructions to be
284 /// generated to compensate for the byte-swapping. But sometimes we do
285 /// need to re-interpret the data in SIMD vector registers in big-endian
286 /// mode without emitting such REV instructions.
287 NVCAST,
288
289 MRS, // MRS, also sets the flags via a glue.
290
291 SMULL,
292 UMULL,
293
294 // Reciprocal estimates and steps.
295 FRECPE,
296 FRECPS,
297 FRSQRTE,
298 FRSQRTS,
299
300 SUNPKHI,
301 SUNPKLO,
302 UUNPKHI,
303 UUNPKLO,
304
305 CLASTA_N,
306 CLASTB_N,
307 LASTA,
308 LASTB,
309 TBL,
310
311 // Floating-point reductions.
312 FADDA_PRED,
313 FADDV_PRED,
314 FMAXV_PRED,
315 FMAXNMV_PRED,
316 FMINV_PRED,
317 FMINNMV_PRED,
318
319 INSR,
320 PTEST,
321 PTRUE,
322
323 BITREVERSE_MERGE_PASSTHRU,
324 BSWAP_MERGE_PASSTHRU,
325 CTLZ_MERGE_PASSTHRU,
326 CTPOP_MERGE_PASSTHRU,
327 DUP_MERGE_PASSTHRU,
328 INDEX_VECTOR,
329
330 // Cast between vectors of the same element type but differ in length.
331 REINTERPRET_CAST,
332
333 // Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa
334 LS64_BUILD,
335 LS64_EXTRACT,
336
337 LD1_MERGE_ZERO,
338 LD1S_MERGE_ZERO,
339 LDNF1_MERGE_ZERO,
340 LDNF1S_MERGE_ZERO,
341 LDFF1_MERGE_ZERO,
342 LDFF1S_MERGE_ZERO,
343 LD1RQ_MERGE_ZERO,
344 LD1RO_MERGE_ZERO,
345
346 // Structured loads.
347 SVE_LD2_MERGE_ZERO,
348 SVE_LD3_MERGE_ZERO,
349 SVE_LD4_MERGE_ZERO,
350
351 // Unsigned gather loads.
352 GLD1_MERGE_ZERO,
353 GLD1_SCALED_MERGE_ZERO,
354 GLD1_UXTW_MERGE_ZERO,
355 GLD1_SXTW_MERGE_ZERO,
356 GLD1_UXTW_SCALED_MERGE_ZERO,
357 GLD1_SXTW_SCALED_MERGE_ZERO,
358 GLD1_IMM_MERGE_ZERO,
359
360 // Signed gather loads
361 GLD1S_MERGE_ZERO,
362 GLD1S_SCALED_MERGE_ZERO,
363 GLD1S_UXTW_MERGE_ZERO,
364 GLD1S_SXTW_MERGE_ZERO,
365 GLD1S_UXTW_SCALED_MERGE_ZERO,
366 GLD1S_SXTW_SCALED_MERGE_ZERO,
367 GLD1S_IMM_MERGE_ZERO,
368
369 // Unsigned gather loads.
370 GLDFF1_MERGE_ZERO,
371 GLDFF1_SCALED_MERGE_ZERO,
372 GLDFF1_UXTW_MERGE_ZERO,
373 GLDFF1_SXTW_MERGE_ZERO,
374 GLDFF1_UXTW_SCALED_MERGE_ZERO,
375 GLDFF1_SXTW_SCALED_MERGE_ZERO,
376 GLDFF1_IMM_MERGE_ZERO,
377
378 // Signed gather loads.
379 GLDFF1S_MERGE_ZERO,
380 GLDFF1S_SCALED_MERGE_ZERO,
381 GLDFF1S_UXTW_MERGE_ZERO,
382 GLDFF1S_SXTW_MERGE_ZERO,
383 GLDFF1S_UXTW_SCALED_MERGE_ZERO,
384 GLDFF1S_SXTW_SCALED_MERGE_ZERO,
385 GLDFF1S_IMM_MERGE_ZERO,
386
387 // Non-temporal gather loads
388 GLDNT1_MERGE_ZERO,
389 GLDNT1_INDEX_MERGE_ZERO,
390 GLDNT1S_MERGE_ZERO,
391
392 // Contiguous masked store.
393 ST1_PRED,
394
395 // Scatter store
396 SST1_PRED,
397 SST1_SCALED_PRED,
398 SST1_UXTW_PRED,
399 SST1_SXTW_PRED,
400 SST1_UXTW_SCALED_PRED,
401 SST1_SXTW_SCALED_PRED,
402 SST1_IMM_PRED,
403
404 // Non-temporal scatter store
405 SSTNT1_PRED,
406 SSTNT1_INDEX_PRED,
407
408 // Strict (exception-raising) floating point comparison
409 STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
410 STRICT_FCMPE,
411
412 // NEON Load/Store with post-increment base updates
413 LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
414 LD3post,
415 LD4post,
416 ST2post,
417 ST3post,
418 ST4post,
419 LD1x2post,
420 LD1x3post,
421 LD1x4post,
422 ST1x2post,
423 ST1x3post,
424 ST1x4post,
425 LD1DUPpost,
426 LD2DUPpost,
427 LD3DUPpost,
428 LD4DUPpost,
429 LD1LANEpost,
430 LD2LANEpost,
431 LD3LANEpost,
432 LD4LANEpost,
433 ST2LANEpost,
434 ST3LANEpost,
435 ST4LANEpost,
436
437 STG,
438 STZG,
439 ST2G,
440 STZ2G,
441
442 LDP,
443 STP,
444 STNP,
445 };
446
447 } // end namespace AArch64ISD
448
449 namespace {
450
451 // Any instruction that defines a 32-bit result zeros out the high half of the
452 // register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
453 // be copying from a truncate. But any other 32-bit operation will zero-extend
454 // up to 64 bits. AssertSext/AssertZext aren't saying anything about the upper
455 // 32 bits, they're probably just qualifying a CopyFromReg.
isDef32(const SDNode & N)456 static inline bool isDef32(const SDNode &N) {
457 unsigned Opc = N.getOpcode();
458 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
459 Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
460 Opc != ISD::AssertZext && Opc != ISD::AssertAlign &&
461 Opc != ISD::FREEZE;
462 }
463
464 } // end anonymous namespace
465
466 namespace AArch64 {
467 /// Possible values of current rounding mode, which is specified in bits
468 /// 23:22 of FPCR.
469 enum Rounding {
470 RN = 0, // Round to Nearest
471 RP = 1, // Round towards Plus infinity
472 RM = 2, // Round towards Minus infinity
473 RZ = 3, // Round towards Zero
474 rmMask = 3 // Bit mask selecting rounding mode
475 };
476
477 // Bit position of rounding mode bits in FPCR.
478 const unsigned RoundingBitsPos = 22;
479 } // namespace AArch64
480
481 class AArch64Subtarget;
482 class AArch64TargetMachine;
483
484 class AArch64TargetLowering : public TargetLowering {
485 public:
486 explicit AArch64TargetLowering(const TargetMachine &TM,
487 const AArch64Subtarget &STI);
488
489 /// Selects the correct CCAssignFn for a given CallingConvention value.
490 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
491
492 /// Selects the correct CCAssignFn for a given CallingConvention value.
493 CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const;
494
495 /// Determine which of the bits specified in Mask are known to be either zero
496 /// or one and return them in the KnownZero/KnownOne bitsets.
497 void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
498 const APInt &DemandedElts,
499 const SelectionDAG &DAG,
500 unsigned Depth = 0) const override;
501
502 MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
503 // Returning i64 unconditionally here (i.e. even for ILP32) means that the
504 // *DAG* representation of pointers will always be 64-bits. They will be
505 // truncated and extended when transferred to memory, but the 64-bit DAG
506 // allows us to use AArch64's addressing modes much more easily.
507 return MVT::getIntegerVT(64);
508 }
509
510 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
511 const APInt &DemandedElts,
512 TargetLoweringOpt &TLO) const override;
513
514 MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
515
516 /// Returns true if the target allows unaligned memory accesses of the
517 /// specified type.
518 bool allowsMisalignedMemoryAccesses(
519 EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
520 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
521 bool *Fast = nullptr) const override;
522 /// LLT variant.
523 bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace,
524 Align Alignment,
525 MachineMemOperand::Flags Flags,
526 bool *Fast = nullptr) const override;
527
528 /// Provide custom lowering hooks for some operations.
529 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
530
531 const char *getTargetNodeName(unsigned Opcode) const override;
532
533 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
534
535 /// This method returns a target specific FastISel object, or null if the
536 /// target does not support "fast" ISel.
537 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
538 const TargetLibraryInfo *libInfo) const override;
539
540 bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
541
542 bool isFPImmLegal(const APFloat &Imm, EVT VT,
543 bool ForCodeSize) const override;
544
545 /// Return true if the given shuffle mask can be codegen'd directly, or if it
546 /// should be stack expanded.
547 bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
548
549 /// Return the ISD::SETCC ValueType.
550 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
551 EVT VT) const override;
552
553 SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
554
555 MachineBasicBlock *EmitF128CSEL(MachineInstr &MI,
556 MachineBasicBlock *BB) const;
557
558 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
559 MachineBasicBlock *BB) const;
560
561 MachineBasicBlock *
562 EmitInstrWithCustomInserter(MachineInstr &MI,
563 MachineBasicBlock *MBB) const override;
564
565 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
566 MachineFunction &MF,
567 unsigned Intrinsic) const override;
568
569 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
570 EVT NewVT) const override;
571
572 bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
573 bool isTruncateFree(EVT VT1, EVT VT2) const override;
574
575 bool isProfitableToHoist(Instruction *I) const override;
576
577 bool isZExtFree(Type *Ty1, Type *Ty2) const override;
578 bool isZExtFree(EVT VT1, EVT VT2) const override;
579 bool isZExtFree(SDValue Val, EVT VT2) const override;
580
581 bool shouldSinkOperands(Instruction *I,
582 SmallVectorImpl<Use *> &Ops) const override;
583
584 bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override;
585
getMaxSupportedInterleaveFactor()586 unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
587
588 bool lowerInterleavedLoad(LoadInst *LI,
589 ArrayRef<ShuffleVectorInst *> Shuffles,
590 ArrayRef<unsigned> Indices,
591 unsigned Factor) const override;
592 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
593 unsigned Factor) const override;
594
595 bool isLegalAddImmediate(int64_t) const override;
596 bool isLegalICmpImmediate(int64_t) const override;
597
598 bool shouldConsiderGEPOffsetSplit() const override;
599
600 EVT getOptimalMemOpType(const MemOp &Op,
601 const AttributeList &FuncAttributes) const override;
602
603 LLT getOptimalMemOpLLT(const MemOp &Op,
604 const AttributeList &FuncAttributes) const override;
605
606 /// Return true if the addressing mode represented by AM is legal for this
607 /// target, for a load/store of the specified type.
608 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
609 unsigned AS,
610 Instruction *I = nullptr) const override;
611
612 /// Return the cost of the scaling factor used in the addressing
613 /// mode represented by AM for this target, for a load/store
614 /// of the specified type.
615 /// If the AM is supported, the return value must be >= 0.
616 /// If the AM is not supported, it returns a negative value.
617 InstructionCost getScalingFactorCost(const DataLayout &DL, const AddrMode &AM,
618 Type *Ty, unsigned AS) const override;
619
620 /// Return true if an FMA operation is faster than a pair of fmul and fadd
621 /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
622 /// returns true, otherwise fmuladd is expanded to fmul + fadd.
623 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
624 EVT VT) const override;
625 bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
626
627 bool generateFMAsInMachineCombiner(EVT VT,
628 CodeGenOpt::Level OptLevel) const override;
629
630 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
631
632 /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
633 bool isDesirableToCommuteWithShift(const SDNode *N,
634 CombineLevel Level) const override;
635
636 /// Returns true if it is beneficial to convert a load of a constant
637 /// to just the constant itself.
638 bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
639 Type *Ty) const override;
640
641 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
642 /// with this index.
643 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
644 unsigned Index) const override;
645
shouldFormOverflowOp(unsigned Opcode,EVT VT,bool MathUsed)646 bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
647 bool MathUsed) const override {
648 // Using overflow ops for overflow checks only should beneficial on
649 // AArch64.
650 return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
651 }
652
653 Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr,
654 AtomicOrdering Ord) const override;
655 Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr,
656 AtomicOrdering Ord) const override;
657
658 void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override;
659
660 TargetLoweringBase::AtomicExpansionKind
661 shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
662 bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
663 TargetLoweringBase::AtomicExpansionKind
664 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
665
666 TargetLoweringBase::AtomicExpansionKind
667 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
668
669 bool useLoadStackGuardNode() const override;
670 TargetLoweringBase::LegalizeTypeAction
671 getPreferredVectorAction(MVT VT) const override;
672
673 /// If the target has a standard location for the stack protector cookie,
674 /// returns the address of that location. Otherwise, returns nullptr.
675 Value *getIRStackGuard(IRBuilderBase &IRB) const override;
676
677 void insertSSPDeclarations(Module &M) const override;
678 Value *getSDagStackGuard(const Module &M) const override;
679 Function *getSSPStackGuardCheck(const Module &M) const override;
680
681 /// If the target has a standard location for the unsafe stack pointer,
682 /// returns the address of that location. Otherwise, returns nullptr.
683 Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
684
685 /// If a physical register, this returns the register that receives the
686 /// exception address on entry to an EH pad.
687 Register
getExceptionPointerRegister(const Constant * PersonalityFn)688 getExceptionPointerRegister(const Constant *PersonalityFn) const override {
689 // FIXME: This is a guess. Has this been defined yet?
690 return AArch64::X0;
691 }
692
693 /// If a physical register, this returns the register that receives the
694 /// exception typeid on entry to a landing pad.
695 Register
getExceptionSelectorRegister(const Constant * PersonalityFn)696 getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
697 // FIXME: This is a guess. Has this been defined yet?
698 return AArch64::X1;
699 }
700
701 bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
702
canMergeStoresTo(unsigned AddressSpace,EVT MemVT,const SelectionDAG & DAG)703 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
704 const SelectionDAG &DAG) const override {
705 // Do not merge to float value size (128 bytes) if no implicit
706 // float attribute is set.
707
708 bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute(
709 Attribute::NoImplicitFloat);
710
711 if (NoFloat)
712 return (MemVT.getSizeInBits() <= 64);
713 return true;
714 }
715
isCheapToSpeculateCttz()716 bool isCheapToSpeculateCttz() const override {
717 return true;
718 }
719
isCheapToSpeculateCtlz()720 bool isCheapToSpeculateCtlz() const override {
721 return true;
722 }
723
724 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
725
hasAndNotCompare(SDValue V)726 bool hasAndNotCompare(SDValue V) const override {
727 // We can use bics for any scalar.
728 return V.getValueType().isScalarInteger();
729 }
730
hasAndNot(SDValue Y)731 bool hasAndNot(SDValue Y) const override {
732 EVT VT = Y.getValueType();
733
734 if (!VT.isVector())
735 return hasAndNotCompare(Y);
736
737 return VT.getSizeInBits() >= 64; // vector 'bic'
738 }
739
740 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
741 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
742 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
743 SelectionDAG &DAG) const override;
744
745 bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
746
shouldTransformSignedTruncationCheck(EVT XVT,unsigned KeptBits)747 bool shouldTransformSignedTruncationCheck(EVT XVT,
748 unsigned KeptBits) const override {
749 // For vectors, we don't have a preference..
750 if (XVT.isVector())
751 return false;
752
753 auto VTIsOk = [](EVT VT) -> bool {
754 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
755 VT == MVT::i64;
756 };
757
758 // We are ok with KeptBitsVT being byte/word/dword, what SXT supports.
759 // XVT will be larger than KeptBitsVT.
760 MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
761 return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
762 }
763
764 bool preferIncOfAddToSubOfNot(EVT VT) const override;
765
hasBitPreservingFPLogic(EVT VT)766 bool hasBitPreservingFPLogic(EVT VT) const override {
767 // FIXME: Is this always true? It should be true for vectors at least.
768 return VT == MVT::f32 || VT == MVT::f64;
769 }
770
supportSplitCSR(MachineFunction * MF)771 bool supportSplitCSR(MachineFunction *MF) const override {
772 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
773 MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
774 }
775 void initializeSplitCSR(MachineBasicBlock *Entry) const override;
776 void insertCopiesSplitCSR(
777 MachineBasicBlock *Entry,
778 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
779
supportSwiftError()780 bool supportSwiftError() const override {
781 return true;
782 }
783
784 /// Enable aggressive FMA fusion on targets that want it.
785 bool enableAggressiveFMAFusion(EVT VT) const override;
786
787 /// Returns the size of the platform's va_list object.
788 unsigned getVaListSizeInBits(const DataLayout &DL) const override;
789
790 /// Returns true if \p VecTy is a legal interleaved access type. This
791 /// function checks the vector element type and the overall width of the
792 /// vector.
793 bool isLegalInterleavedAccessType(VectorType *VecTy,
794 const DataLayout &DL) const;
795
796 /// Returns the number of interleaved accesses that will be generated when
797 /// lowering accesses of the given type.
798 unsigned getNumInterleavedAccesses(VectorType *VecTy,
799 const DataLayout &DL) const;
800
801 MachineMemOperand::Flags getTargetMMOFlags(
802 const Instruction &I) const override;
803
804 bool functionArgumentNeedsConsecutiveRegisters(
805 Type *Ty, CallingConv::ID CallConv, bool isVarArg,
806 const DataLayout &DL) const override;
807
808 /// Used for exception handling on Win64.
809 bool needsFixedCatchObjects() const override;
810
811 bool fallBackToDAGISel(const Instruction &Inst) const override;
812
813 /// SVE code generation for fixed length vectors does not custom lower
814 /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to
815 /// merge. However, merging them creates a BUILD_VECTOR that is just as
816 /// illegal as the original, thus leading to an infinite legalisation loop.
817 /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal
818 /// vector types this override can be removed.
819 bool mergeStoresAfterLegalization(EVT VT) const override;
820
821 // If the platform/function should have a redzone, return the size in bytes.
getRedZoneSize(const Function & F)822 unsigned getRedZoneSize(const Function &F) const {
823 if (F.hasFnAttribute(Attribute::NoRedZone))
824 return 0;
825 return 128;
826 }
827
828 bool isAllActivePredicate(SDValue N) const;
829 EVT getPromotedVTForPredicate(EVT VT) const;
830
831 EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty,
832 bool AllowUnknown = false) const override;
833
834 private:
835 /// Keep a pointer to the AArch64Subtarget around so that we can
836 /// make the right decision when generating code for different targets.
837 const AArch64Subtarget *Subtarget;
838
839 bool isExtFreeImpl(const Instruction *Ext) const override;
840
841 void addTypeForNEON(MVT VT);
842 void addTypeForFixedLengthSVE(MVT VT);
843 void addDRTypeForNEON(MVT VT);
844 void addQRTypeForNEON(MVT VT);
845
846 SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
847 bool isVarArg,
848 const SmallVectorImpl<ISD::InputArg> &Ins,
849 const SDLoc &DL, SelectionDAG &DAG,
850 SmallVectorImpl<SDValue> &InVals) const override;
851
852 SDValue LowerCall(CallLoweringInfo & /*CLI*/,
853 SmallVectorImpl<SDValue> &InVals) const override;
854
855 SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
856 CallingConv::ID CallConv, bool isVarArg,
857 const SmallVectorImpl<ISD::InputArg> &Ins,
858 const SDLoc &DL, SelectionDAG &DAG,
859 SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
860 SDValue ThisVal) const;
861
862 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
863 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
864 SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
865
866 SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
867 SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
868
869 SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const;
870
871 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
872
873 bool isEligibleForTailCallOptimization(
874 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
875 const SmallVectorImpl<ISD::OutputArg> &Outs,
876 const SmallVectorImpl<SDValue> &OutVals,
877 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
878
879 /// Finds the incoming stack arguments which overlap the given fixed stack
880 /// object and incorporates their load into the current chain. This prevents
881 /// an upcoming store from clobbering the stack argument before it's used.
882 SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
883 MachineFrameInfo &MFI, int ClobberedFI) const;
884
885 bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
886
887 void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL,
888 SDValue &Chain) const;
889
890 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
891 bool isVarArg,
892 const SmallVectorImpl<ISD::OutputArg> &Outs,
893 LLVMContext &Context) const override;
894
895 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
896 const SmallVectorImpl<ISD::OutputArg> &Outs,
897 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
898 SelectionDAG &DAG) const override;
899
900 SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
901 unsigned Flag) const;
902 SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
903 unsigned Flag) const;
904 SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
905 unsigned Flag) const;
906 SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
907 unsigned Flag) const;
908 template <class NodeTy>
909 SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
910 template <class NodeTy>
911 SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
912 template <class NodeTy>
913 SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
914 template <class NodeTy>
915 SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
916 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
917 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
918 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
919 SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
920 SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
921 SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase,
922 const SDLoc &DL, SelectionDAG &DAG) const;
923 SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL,
924 SelectionDAG &DAG) const;
925 SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
926 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
927 SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
928 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
929 SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
930 SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
931 SDValue TVal, SDValue FVal, const SDLoc &dl,
932 SelectionDAG &DAG) const;
933 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
934 SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
935 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
936 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
937 SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
938 SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
939 SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const;
940 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
941 SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
942 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
943 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
944 SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
945 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
946 SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
947 SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
948 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
949 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
950 SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
951 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
952 SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
953 SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
954 SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
955 SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, unsigned NewOp,
956 bool OverrideNEON = false) const;
957 SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
958 SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
959 SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
960 SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
961 SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;
962 SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
963 SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
964 SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
965 SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
966 SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
967 SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
968 SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const;
969 SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
970 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
971 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
972 SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
973 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
974 SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
975 SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
976 SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
977 SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
978 SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const;
979 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
980 SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
981 SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
982 SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
983 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
984 SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
985 SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
986 SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
987 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
988 SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain,
989 SDValue &Size,
990 SelectionDAG &DAG) const;
991 SDValue LowerSVEStructLoad(unsigned Intrinsic, ArrayRef<SDValue> LoadOps,
992 EVT VT, SelectionDAG &DAG, const SDLoc &DL) const;
993
994 SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
995 SelectionDAG &DAG) const;
996 SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,
997 SelectionDAG &DAG) const;
998 SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
999 SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
1000 SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const;
1001 SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const;
1002 SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp,
1003 SelectionDAG &DAG) const;
1004 SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;
1005 SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;
1006 SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;
1007 SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op,
1008 SelectionDAG &DAG) const;
1009 SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,
1010 SelectionDAG &DAG) const;
1011 SDValue LowerFixedLengthExtractVectorElt(SDValue Op, SelectionDAG &DAG) const;
1012 SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const;
1013 SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const;
1014 SDValue LowerFixedLengthConcatVectorsToSVE(SDValue Op,
1015 SelectionDAG &DAG) const;
1016 SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const;
1017 SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const;
1018 SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const;
1019 SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const;
1020 SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op,
1021 SelectionDAG &DAG) const;
1022
1023 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1024 SmallVectorImpl<SDNode *> &Created) const override;
1025 SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1026 int &ExtraSteps, bool &UseOneConst,
1027 bool Reciprocal) const override;
1028 SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1029 int &ExtraSteps) const override;
1030 SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
1031 const DenormalMode &Mode) const override;
1032 SDValue getSqrtResultForDenormInput(SDValue Operand,
1033 SelectionDAG &DAG) const override;
1034 unsigned combineRepeatedFPDivisors() const override;
1035
1036 ConstraintType getConstraintType(StringRef Constraint) const override;
1037 Register getRegisterByName(const char* RegName, LLT VT,
1038 const MachineFunction &MF) const override;
1039
1040 /// Examine constraint string and operand type and determine a weight value.
1041 /// The operand object must already have been set up with the operand type.
1042 ConstraintWeight
1043 getSingleConstraintMatchWeight(AsmOperandInfo &info,
1044 const char *constraint) const override;
1045
1046 std::pair<unsigned, const TargetRegisterClass *>
1047 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1048 StringRef Constraint, MVT VT) const override;
1049
1050 const char *LowerXConstraint(EVT ConstraintVT) const override;
1051
1052 void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
1053 std::vector<SDValue> &Ops,
1054 SelectionDAG &DAG) const override;
1055
getInlineAsmMemConstraint(StringRef ConstraintCode)1056 unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1057 if (ConstraintCode == "Q")
1058 return InlineAsm::Constraint_Q;
1059 // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
1060 // followed by llvm_unreachable so we'll leave them unimplemented in
1061 // the backend for now.
1062 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1063 }
1064
1065 bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override;
1066 bool shouldRemoveExtendFromGSIndex(EVT VT) const override;
1067 bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
1068 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1069 bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1070 bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
1071 ISD::MemIndexedMode &AM, bool &IsInc,
1072 SelectionDAG &DAG) const;
1073 bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
1074 ISD::MemIndexedMode &AM,
1075 SelectionDAG &DAG) const override;
1076 bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
1077 SDValue &Offset, ISD::MemIndexedMode &AM,
1078 SelectionDAG &DAG) const override;
1079
1080 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1081 SelectionDAG &DAG) const override;
1082 void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1083 SelectionDAG &DAG) const;
1084 void ReplaceExtractSubVectorResults(SDNode *N,
1085 SmallVectorImpl<SDValue> &Results,
1086 SelectionDAG &DAG) const;
1087
1088 bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
1089
1090 void finalizeLowering(MachineFunction &MF) const override;
1091
1092 bool shouldLocalize(const MachineInstr &MI,
1093 const TargetTransformInfo *TTI) const override;
1094
1095 bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1096 const APInt &OriginalDemandedBits,
1097 const APInt &OriginalDemandedElts,
1098 KnownBits &Known,
1099 TargetLoweringOpt &TLO,
1100 unsigned Depth) const override;
1101
1102 // Normally SVE is only used for byte size vectors that do not fit within a
1103 // NEON vector. This changes when OverrideNEON is true, allowing SVE to be
1104 // used for 64bit and 128bit vectors as well.
1105 bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const;
1106
1107 // With the exception of data-predicate transitions, no instructions are
1108 // required to cast between legal scalable vector types. However:
1109 // 1. Packed and unpacked types have different bit lengths, meaning BITCAST
1110 // is not universally useable.
1111 // 2. Most unpacked integer types are not legal and thus integer extends
1112 // cannot be used to convert between unpacked and packed types.
1113 // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used
1114 // to transition between unpacked and packed types of the same element type,
1115 // with BITCAST used otherwise.
1116 SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const;
1117
1118 bool isConstantUnsignedBitfieldExtactLegal(unsigned Opc, LLT Ty1,
1119 LLT Ty2) const override;
1120 };
1121
1122 namespace AArch64 {
1123 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1124 const TargetLibraryInfo *libInfo);
1125 } // end namespace AArch64
1126
1127 } // end namespace llvm
1128
1129 #endif
1130