1 //==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that AArch64 uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
15 #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
16
17 #include "AArch64.h"
18 #include "llvm/CodeGen/CallingConvLower.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/SelectionDAG.h"
21 #include "llvm/CodeGen/TargetLowering.h"
22 #include "llvm/IR/CallingConv.h"
23 #include "llvm/IR/Instruction.h"
24
25 namespace llvm {
26
27 namespace AArch64ISD {
28
29 // For predicated nodes where the result is a vector, the operation is
30 // controlled by a governing predicate and the inactive lanes are explicitly
31 // defined with a value, please stick the following naming convention:
32 //
33 // _MERGE_OP<n> The result value is a vector with inactive lanes equal
34 // to source operand OP<n>.
35 //
36 // _MERGE_ZERO The result value is a vector with inactive lanes
37 // actively zeroed.
38 //
39 // _MERGE_PASSTHRU The result value is a vector with inactive lanes equal
40 // to the last source operand which only purpose is being
41 // a passthru value.
42 //
43 // For other cases where no explicit action is needed to set the inactive lanes,
44 // or when the result is not a vector and it is needed or helpful to
45 // distinguish a node from similar unpredicated nodes, use:
46 //
47 // _PRED
48 //
49 enum NodeType : unsigned {
50 FIRST_NUMBER = ISD::BUILTIN_OP_END,
51 WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
52 CALL, // Function call.
53
54 // Pseudo for a OBJC call that gets emitted together with a special `mov
55 // x29, x29` marker instruction.
56 CALL_RVMARKER,
57
58 // Produces the full sequence of instructions for getting the thread pointer
59 // offset of a variable into X0, using the TLSDesc model.
60 TLSDESC_CALLSEQ,
61 ADRP, // Page address of a TargetGlobalAddress operand.
62 ADR, // ADR
63 ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand.
64 LOADgot, // Load from automatically generated descriptor (e.g. Global
65 // Offset Table, TLS record).
66 RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand.
67 BRCOND, // Conditional branch instruction; "b.cond".
68 CSEL,
69 CSINV, // Conditional select invert.
70 CSNEG, // Conditional select negate.
71 CSINC, // Conditional select increment.
72
73 // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
74 // ELF.
75 THREAD_POINTER,
76 ADC,
77 SBC, // adc, sbc instructions
78
79 // Predicated instructions where inactive lanes produce undefined results.
80 ADD_PRED,
81 FADD_PRED,
82 FDIV_PRED,
83 FMA_PRED,
84 FMAXNM_PRED,
85 FMINNM_PRED,
86 FMAX_PRED,
87 FMIN_PRED,
88 FMUL_PRED,
89 FSUB_PRED,
90 MUL_PRED,
91 MULHS_PRED,
92 MULHU_PRED,
93 SDIV_PRED,
94 SHL_PRED,
95 SMAX_PRED,
96 SMIN_PRED,
97 SRA_PRED,
98 SRL_PRED,
99 SUB_PRED,
100 UDIV_PRED,
101 UMAX_PRED,
102 UMIN_PRED,
103
104 // Unpredicated vector instructions
105 BIC,
106
107 // Predicated instructions with the result of inactive lanes provided by the
108 // last operand.
109 FABS_MERGE_PASSTHRU,
110 FCEIL_MERGE_PASSTHRU,
111 FFLOOR_MERGE_PASSTHRU,
112 FNEARBYINT_MERGE_PASSTHRU,
113 FNEG_MERGE_PASSTHRU,
114 FRECPX_MERGE_PASSTHRU,
115 FRINT_MERGE_PASSTHRU,
116 FROUND_MERGE_PASSTHRU,
117 FROUNDEVEN_MERGE_PASSTHRU,
118 FSQRT_MERGE_PASSTHRU,
119 FTRUNC_MERGE_PASSTHRU,
120 FP_ROUND_MERGE_PASSTHRU,
121 FP_EXTEND_MERGE_PASSTHRU,
122 UINT_TO_FP_MERGE_PASSTHRU,
123 SINT_TO_FP_MERGE_PASSTHRU,
124 FCVTZU_MERGE_PASSTHRU,
125 FCVTZS_MERGE_PASSTHRU,
126 SIGN_EXTEND_INREG_MERGE_PASSTHRU,
127 ZERO_EXTEND_INREG_MERGE_PASSTHRU,
128 ABS_MERGE_PASSTHRU,
129 NEG_MERGE_PASSTHRU,
130
131 SETCC_MERGE_ZERO,
132
133 // Arithmetic instructions which write flags.
134 ADDS,
135 SUBS,
136 ADCS,
137 SBCS,
138 ANDS,
139
140 // Conditional compares. Operands: left,right,falsecc,cc,flags
141 CCMP,
142 CCMN,
143 FCCMP,
144
145 // Floating point comparison
146 FCMP,
147
148 // Scalar extract
149 EXTR,
150
151 // Scalar-to-vector duplication
152 DUP,
153 DUPLANE8,
154 DUPLANE16,
155 DUPLANE32,
156 DUPLANE64,
157
158 // Vector immedate moves
159 MOVI,
160 MOVIshift,
161 MOVIedit,
162 MOVImsl,
163 FMOV,
164 MVNIshift,
165 MVNImsl,
166
167 // Vector immediate ops
168 BICi,
169 ORRi,
170
171 // Vector bitwise select: similar to ISD::VSELECT but not all bits within an
172 // element must be identical.
173 BSP,
174
175 // Vector shuffles
176 ZIP1,
177 ZIP2,
178 UZP1,
179 UZP2,
180 TRN1,
181 TRN2,
182 REV16,
183 REV32,
184 REV64,
185 EXT,
186 SPLICE,
187
188 // Vector shift by scalar
189 VSHL,
190 VLSHR,
191 VASHR,
192
193 // Vector shift by scalar (again)
194 SQSHL_I,
195 UQSHL_I,
196 SQSHLU_I,
197 SRSHR_I,
198 URSHR_I,
199
200 // Vector shift by constant and insert
201 VSLI,
202 VSRI,
203
204 // Vector comparisons
205 CMEQ,
206 CMGE,
207 CMGT,
208 CMHI,
209 CMHS,
210 FCMEQ,
211 FCMGE,
212 FCMGT,
213
214 // Vector zero comparisons
215 CMEQz,
216 CMGEz,
217 CMGTz,
218 CMLEz,
219 CMLTz,
220 FCMEQz,
221 FCMGEz,
222 FCMGTz,
223 FCMLEz,
224 FCMLTz,
225
226 // Vector across-lanes addition
227 // Only the lower result lane is defined.
228 SADDV,
229 UADDV,
230
231 // Vector halving addition
232 SHADD,
233 UHADD,
234
235 // Vector rounding halving addition
236 SRHADD,
237 URHADD,
238
239 // Unsigned Add Long Pairwise
240 UADDLP,
241
242 // udot/sdot instructions
243 UDOT,
244 SDOT,
245
246 // Vector across-lanes min/max
247 // Only the lower result lane is defined.
248 SMINV,
249 UMINV,
250 SMAXV,
251 UMAXV,
252
253 SADDV_PRED,
254 UADDV_PRED,
255 SMAXV_PRED,
256 UMAXV_PRED,
257 SMINV_PRED,
258 UMINV_PRED,
259 ORV_PRED,
260 EORV_PRED,
261 ANDV_PRED,
262
263 // Vector bitwise insertion
264 BIT,
265
266 // Compare-and-branch
267 CBZ,
268 CBNZ,
269 TBZ,
270 TBNZ,
271
272 // Tail calls
273 TC_RETURN,
274
275 // Custom prefetch handling
276 PREFETCH,
277
278 // {s|u}int to FP within a FP register.
279 SITOF,
280 UITOF,
281
282 /// Natural vector cast. ISD::BITCAST is not natural in the big-endian
283 /// world w.r.t vectors; which causes additional REV instructions to be
284 /// generated to compensate for the byte-swapping. But sometimes we do
285 /// need to re-interpret the data in SIMD vector registers in big-endian
286 /// mode without emitting such REV instructions.
287 NVCAST,
288
289 MRS, // MRS, also sets the flags via a glue.
290
291 SMULL,
292 UMULL,
293
294 // Reciprocal estimates and steps.
295 FRECPE,
296 FRECPS,
297 FRSQRTE,
298 FRSQRTS,
299
300 SUNPKHI,
301 SUNPKLO,
302 UUNPKHI,
303 UUNPKLO,
304
305 CLASTA_N,
306 CLASTB_N,
307 LASTA,
308 LASTB,
309 TBL,
310
311 // Floating-point reductions.
312 FADDA_PRED,
313 FADDV_PRED,
314 FMAXV_PRED,
315 FMAXNMV_PRED,
316 FMINV_PRED,
317 FMINNMV_PRED,
318
319 INSR,
320 PTEST,
321 PTRUE,
322
323 BITREVERSE_MERGE_PASSTHRU,
324 BSWAP_MERGE_PASSTHRU,
325 CTLZ_MERGE_PASSTHRU,
326 CTPOP_MERGE_PASSTHRU,
327 DUP_MERGE_PASSTHRU,
328 INDEX_VECTOR,
329
330 // Cast between vectors of the same element type but differ in length.
331 REINTERPRET_CAST,
332
333 // Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa
334 LS64_BUILD,
335 LS64_EXTRACT,
336
337 LD1_MERGE_ZERO,
338 LD1S_MERGE_ZERO,
339 LDNF1_MERGE_ZERO,
340 LDNF1S_MERGE_ZERO,
341 LDFF1_MERGE_ZERO,
342 LDFF1S_MERGE_ZERO,
343 LD1RQ_MERGE_ZERO,
344 LD1RO_MERGE_ZERO,
345
346 // Structured loads.
347 SVE_LD2_MERGE_ZERO,
348 SVE_LD3_MERGE_ZERO,
349 SVE_LD4_MERGE_ZERO,
350
351 // Unsigned gather loads.
352 GLD1_MERGE_ZERO,
353 GLD1_SCALED_MERGE_ZERO,
354 GLD1_UXTW_MERGE_ZERO,
355 GLD1_SXTW_MERGE_ZERO,
356 GLD1_UXTW_SCALED_MERGE_ZERO,
357 GLD1_SXTW_SCALED_MERGE_ZERO,
358 GLD1_IMM_MERGE_ZERO,
359
360 // Signed gather loads
361 GLD1S_MERGE_ZERO,
362 GLD1S_SCALED_MERGE_ZERO,
363 GLD1S_UXTW_MERGE_ZERO,
364 GLD1S_SXTW_MERGE_ZERO,
365 GLD1S_UXTW_SCALED_MERGE_ZERO,
366 GLD1S_SXTW_SCALED_MERGE_ZERO,
367 GLD1S_IMM_MERGE_ZERO,
368
369 // Unsigned gather loads.
370 GLDFF1_MERGE_ZERO,
371 GLDFF1_SCALED_MERGE_ZERO,
372 GLDFF1_UXTW_MERGE_ZERO,
373 GLDFF1_SXTW_MERGE_ZERO,
374 GLDFF1_UXTW_SCALED_MERGE_ZERO,
375 GLDFF1_SXTW_SCALED_MERGE_ZERO,
376 GLDFF1_IMM_MERGE_ZERO,
377
378 // Signed gather loads.
379 GLDFF1S_MERGE_ZERO,
380 GLDFF1S_SCALED_MERGE_ZERO,
381 GLDFF1S_UXTW_MERGE_ZERO,
382 GLDFF1S_SXTW_MERGE_ZERO,
383 GLDFF1S_UXTW_SCALED_MERGE_ZERO,
384 GLDFF1S_SXTW_SCALED_MERGE_ZERO,
385 GLDFF1S_IMM_MERGE_ZERO,
386
387 // Non-temporal gather loads
388 GLDNT1_MERGE_ZERO,
389 GLDNT1_INDEX_MERGE_ZERO,
390 GLDNT1S_MERGE_ZERO,
391
392 // Contiguous masked store.
393 ST1_PRED,
394
395 // Scatter store
396 SST1_PRED,
397 SST1_SCALED_PRED,
398 SST1_UXTW_PRED,
399 SST1_SXTW_PRED,
400 SST1_UXTW_SCALED_PRED,
401 SST1_SXTW_SCALED_PRED,
402 SST1_IMM_PRED,
403
404 // Non-temporal scatter store
405 SSTNT1_PRED,
406 SSTNT1_INDEX_PRED,
407
408 // Asserts that a function argument (i32) is zero-extended to i8 by
409 // the caller
410 ASSERT_ZEXT_BOOL,
411
412 // Strict (exception-raising) floating point comparison
413 STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
414 STRICT_FCMPE,
415
416 // NEON Load/Store with post-increment base updates
417 LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
418 LD3post,
419 LD4post,
420 ST2post,
421 ST3post,
422 ST4post,
423 LD1x2post,
424 LD1x3post,
425 LD1x4post,
426 ST1x2post,
427 ST1x3post,
428 ST1x4post,
429 LD1DUPpost,
430 LD2DUPpost,
431 LD3DUPpost,
432 LD4DUPpost,
433 LD1LANEpost,
434 LD2LANEpost,
435 LD3LANEpost,
436 LD4LANEpost,
437 ST2LANEpost,
438 ST3LANEpost,
439 ST4LANEpost,
440
441 STG,
442 STZG,
443 ST2G,
444 STZ2G,
445
446 LDP,
447 STP,
448 STNP,
449 };
450
451 } // end namespace AArch64ISD
452
453 namespace {
454
455 // Any instruction that defines a 32-bit result zeros out the high half of the
456 // register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
457 // be copying from a truncate. But any other 32-bit operation will zero-extend
458 // up to 64 bits. AssertSext/AssertZext aren't saying anything about the upper
459 // 32 bits, they're probably just qualifying a CopyFromReg.
isDef32(const SDNode & N)460 static inline bool isDef32(const SDNode &N) {
461 unsigned Opc = N.getOpcode();
462 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
463 Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
464 Opc != ISD::AssertZext && Opc != ISD::AssertAlign &&
465 Opc != ISD::FREEZE;
466 }
467
468 } // end anonymous namespace
469
470 namespace AArch64 {
471 /// Possible values of current rounding mode, which is specified in bits
472 /// 23:22 of FPCR.
473 enum Rounding {
474 RN = 0, // Round to Nearest
475 RP = 1, // Round towards Plus infinity
476 RM = 2, // Round towards Minus infinity
477 RZ = 3, // Round towards Zero
478 rmMask = 3 // Bit mask selecting rounding mode
479 };
480
481 // Bit position of rounding mode bits in FPCR.
482 const unsigned RoundingBitsPos = 22;
483 } // namespace AArch64
484
485 class AArch64Subtarget;
486 class AArch64TargetMachine;
487
488 class AArch64TargetLowering : public TargetLowering {
489 public:
490 explicit AArch64TargetLowering(const TargetMachine &TM,
491 const AArch64Subtarget &STI);
492
493 /// Selects the correct CCAssignFn for a given CallingConvention value.
494 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
495
496 /// Selects the correct CCAssignFn for a given CallingConvention value.
497 CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const;
498
499 /// Determine which of the bits specified in Mask are known to be either zero
500 /// or one and return them in the KnownZero/KnownOne bitsets.
501 void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
502 const APInt &DemandedElts,
503 const SelectionDAG &DAG,
504 unsigned Depth = 0) const override;
505
506 MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
507 // Returning i64 unconditionally here (i.e. even for ILP32) means that the
508 // *DAG* representation of pointers will always be 64-bits. They will be
509 // truncated and extended when transferred to memory, but the 64-bit DAG
510 // allows us to use AArch64's addressing modes much more easily.
511 return MVT::getIntegerVT(64);
512 }
513
514 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
515 const APInt &DemandedElts,
516 TargetLoweringOpt &TLO) const override;
517
518 MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
519
520 /// Returns true if the target allows unaligned memory accesses of the
521 /// specified type.
522 bool allowsMisalignedMemoryAccesses(
523 EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
524 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
525 bool *Fast = nullptr) const override;
526 /// LLT variant.
527 bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace,
528 Align Alignment,
529 MachineMemOperand::Flags Flags,
530 bool *Fast = nullptr) const override;
531
532 /// Provide custom lowering hooks for some operations.
533 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
534
535 const char *getTargetNodeName(unsigned Opcode) const override;
536
537 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
538
539 /// This method returns a target specific FastISel object, or null if the
540 /// target does not support "fast" ISel.
541 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
542 const TargetLibraryInfo *libInfo) const override;
543
544 bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
545
546 bool isFPImmLegal(const APFloat &Imm, EVT VT,
547 bool ForCodeSize) const override;
548
549 /// Return true if the given shuffle mask can be codegen'd directly, or if it
550 /// should be stack expanded.
551 bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
552
553 /// Return the ISD::SETCC ValueType.
554 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
555 EVT VT) const override;
556
557 SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
558
559 MachineBasicBlock *EmitF128CSEL(MachineInstr &MI,
560 MachineBasicBlock *BB) const;
561
562 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
563 MachineBasicBlock *BB) const;
564
565 MachineBasicBlock *
566 EmitInstrWithCustomInserter(MachineInstr &MI,
567 MachineBasicBlock *MBB) const override;
568
569 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
570 MachineFunction &MF,
571 unsigned Intrinsic) const override;
572
573 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
574 EVT NewVT) const override;
575
576 bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
577 bool isTruncateFree(EVT VT1, EVT VT2) const override;
578
579 bool isProfitableToHoist(Instruction *I) const override;
580
581 bool isZExtFree(Type *Ty1, Type *Ty2) const override;
582 bool isZExtFree(EVT VT1, EVT VT2) const override;
583 bool isZExtFree(SDValue Val, EVT VT2) const override;
584
585 bool shouldSinkOperands(Instruction *I,
586 SmallVectorImpl<Use *> &Ops) const override;
587
588 bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override;
589
getMaxSupportedInterleaveFactor()590 unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
591
592 bool lowerInterleavedLoad(LoadInst *LI,
593 ArrayRef<ShuffleVectorInst *> Shuffles,
594 ArrayRef<unsigned> Indices,
595 unsigned Factor) const override;
596 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
597 unsigned Factor) const override;
598
599 bool isLegalAddImmediate(int64_t) const override;
600 bool isLegalICmpImmediate(int64_t) const override;
601
602 bool isMulAddWithConstProfitable(const SDValue &AddNode,
603 const SDValue &ConstNode) const override;
604
605 bool shouldConsiderGEPOffsetSplit() const override;
606
607 EVT getOptimalMemOpType(const MemOp &Op,
608 const AttributeList &FuncAttributes) const override;
609
610 LLT getOptimalMemOpLLT(const MemOp &Op,
611 const AttributeList &FuncAttributes) const override;
612
613 /// Return true if the addressing mode represented by AM is legal for this
614 /// target, for a load/store of the specified type.
615 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
616 unsigned AS,
617 Instruction *I = nullptr) const override;
618
619 /// Return the cost of the scaling factor used in the addressing
620 /// mode represented by AM for this target, for a load/store
621 /// of the specified type.
622 /// If the AM is supported, the return value must be >= 0.
623 /// If the AM is not supported, it returns a negative value.
624 InstructionCost getScalingFactorCost(const DataLayout &DL, const AddrMode &AM,
625 Type *Ty, unsigned AS) const override;
626
627 /// Return true if an FMA operation is faster than a pair of fmul and fadd
628 /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
629 /// returns true, otherwise fmuladd is expanded to fmul + fadd.
630 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
631 EVT VT) const override;
632 bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
633
634 bool generateFMAsInMachineCombiner(EVT VT,
635 CodeGenOpt::Level OptLevel) const override;
636
637 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
638
639 /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
640 bool isDesirableToCommuteWithShift(const SDNode *N,
641 CombineLevel Level) const override;
642
643 /// Returns true if it is beneficial to convert a load of a constant
644 /// to just the constant itself.
645 bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
646 Type *Ty) const override;
647
648 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
649 /// with this index.
650 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
651 unsigned Index) const override;
652
shouldFormOverflowOp(unsigned Opcode,EVT VT,bool MathUsed)653 bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
654 bool MathUsed) const override {
655 // Using overflow ops for overflow checks only should beneficial on
656 // AArch64.
657 return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
658 }
659
660 Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr,
661 AtomicOrdering Ord) const override;
662 Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr,
663 AtomicOrdering Ord) const override;
664
665 void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override;
666
667 bool isOpSuitableForLDPSTP(const Instruction *I) const;
668 bool shouldInsertFencesForAtomic(const Instruction *I) const override;
669
670 TargetLoweringBase::AtomicExpansionKind
671 shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
672 bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
673 TargetLoweringBase::AtomicExpansionKind
674 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
675
676 TargetLoweringBase::AtomicExpansionKind
677 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
678
679 bool useLoadStackGuardNode() const override;
680 TargetLoweringBase::LegalizeTypeAction
681 getPreferredVectorAction(MVT VT) const override;
682
683 /// If the target has a standard location for the stack protector cookie,
684 /// returns the address of that location. Otherwise, returns nullptr.
685 Value *getIRStackGuard(IRBuilderBase &IRB) const override;
686
687 void insertSSPDeclarations(Module &M) const override;
688 Value *getSDagStackGuard(const Module &M) const override;
689 Function *getSSPStackGuardCheck(const Module &M) const override;
690
691 /// If the target has a standard location for the unsafe stack pointer,
692 /// returns the address of that location. Otherwise, returns nullptr.
693 Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
694
695 /// If a physical register, this returns the register that receives the
696 /// exception address on entry to an EH pad.
697 Register
getExceptionPointerRegister(const Constant * PersonalityFn)698 getExceptionPointerRegister(const Constant *PersonalityFn) const override {
699 // FIXME: This is a guess. Has this been defined yet?
700 return AArch64::X0;
701 }
702
703 /// If a physical register, this returns the register that receives the
704 /// exception typeid on entry to a landing pad.
705 Register
getExceptionSelectorRegister(const Constant * PersonalityFn)706 getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
707 // FIXME: This is a guess. Has this been defined yet?
708 return AArch64::X1;
709 }
710
711 bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
712
canMergeStoresTo(unsigned AddressSpace,EVT MemVT,const MachineFunction & MF)713 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
714 const MachineFunction &MF) const override {
715 // Do not merge to float value size (128 bytes) if no implicit
716 // float attribute is set.
717
718 bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
719
720 if (NoFloat)
721 return (MemVT.getSizeInBits() <= 64);
722 return true;
723 }
724
isCheapToSpeculateCttz()725 bool isCheapToSpeculateCttz() const override {
726 return true;
727 }
728
isCheapToSpeculateCtlz()729 bool isCheapToSpeculateCtlz() const override {
730 return true;
731 }
732
733 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
734
hasAndNotCompare(SDValue V)735 bool hasAndNotCompare(SDValue V) const override {
736 // We can use bics for any scalar.
737 return V.getValueType().isScalarInteger();
738 }
739
hasAndNot(SDValue Y)740 bool hasAndNot(SDValue Y) const override {
741 EVT VT = Y.getValueType();
742
743 if (!VT.isVector())
744 return hasAndNotCompare(Y);
745
746 return VT.getSizeInBits() >= 64; // vector 'bic'
747 }
748
749 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
750 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
751 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
752 SelectionDAG &DAG) const override;
753
754 bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
755
shouldTransformSignedTruncationCheck(EVT XVT,unsigned KeptBits)756 bool shouldTransformSignedTruncationCheck(EVT XVT,
757 unsigned KeptBits) const override {
758 // For vectors, we don't have a preference..
759 if (XVT.isVector())
760 return false;
761
762 auto VTIsOk = [](EVT VT) -> bool {
763 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
764 VT == MVT::i64;
765 };
766
767 // We are ok with KeptBitsVT being byte/word/dword, what SXT supports.
768 // XVT will be larger than KeptBitsVT.
769 MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
770 return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
771 }
772
773 bool preferIncOfAddToSubOfNot(EVT VT) const override;
774
hasBitPreservingFPLogic(EVT VT)775 bool hasBitPreservingFPLogic(EVT VT) const override {
776 // FIXME: Is this always true? It should be true for vectors at least.
777 return VT == MVT::f32 || VT == MVT::f64;
778 }
779
supportSplitCSR(MachineFunction * MF)780 bool supportSplitCSR(MachineFunction *MF) const override {
781 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
782 MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
783 }
784 void initializeSplitCSR(MachineBasicBlock *Entry) const override;
785 void insertCopiesSplitCSR(
786 MachineBasicBlock *Entry,
787 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
788
supportSwiftError()789 bool supportSwiftError() const override {
790 return true;
791 }
792
793 /// Enable aggressive FMA fusion on targets that want it.
794 bool enableAggressiveFMAFusion(EVT VT) const override;
795
796 /// Returns the size of the platform's va_list object.
797 unsigned getVaListSizeInBits(const DataLayout &DL) const override;
798
799 /// Returns true if \p VecTy is a legal interleaved access type. This
800 /// function checks the vector element type and the overall width of the
801 /// vector.
802 bool isLegalInterleavedAccessType(VectorType *VecTy,
803 const DataLayout &DL) const;
804
805 /// Returns the number of interleaved accesses that will be generated when
806 /// lowering accesses of the given type.
807 unsigned getNumInterleavedAccesses(VectorType *VecTy,
808 const DataLayout &DL) const;
809
810 MachineMemOperand::Flags getTargetMMOFlags(
811 const Instruction &I) const override;
812
813 bool functionArgumentNeedsConsecutiveRegisters(
814 Type *Ty, CallingConv::ID CallConv, bool isVarArg,
815 const DataLayout &DL) const override;
816
817 /// Used for exception handling on Win64.
818 bool needsFixedCatchObjects() const override;
819
820 bool fallBackToDAGISel(const Instruction &Inst) const override;
821
822 /// SVE code generation for fixed length vectors does not custom lower
823 /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to
824 /// merge. However, merging them creates a BUILD_VECTOR that is just as
825 /// illegal as the original, thus leading to an infinite legalisation loop.
826 /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal
827 /// vector types this override can be removed.
828 bool mergeStoresAfterLegalization(EVT VT) const override;
829
830 // If the platform/function should have a redzone, return the size in bytes.
getRedZoneSize(const Function & F)831 unsigned getRedZoneSize(const Function &F) const {
832 if (F.hasFnAttribute(Attribute::NoRedZone))
833 return 0;
834 return 128;
835 }
836
837 bool isAllActivePredicate(SDValue N) const;
838 EVT getPromotedVTForPredicate(EVT VT) const;
839
840 EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty,
841 bool AllowUnknown = false) const override;
842
843 private:
844 /// Keep a pointer to the AArch64Subtarget around so that we can
845 /// make the right decision when generating code for different targets.
846 const AArch64Subtarget *Subtarget;
847
848 bool isExtFreeImpl(const Instruction *Ext) const override;
849
850 void addTypeForNEON(MVT VT);
851 void addTypeForFixedLengthSVE(MVT VT);
852 void addDRTypeForNEON(MVT VT);
853 void addQRTypeForNEON(MVT VT);
854
855 SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
856 bool isVarArg,
857 const SmallVectorImpl<ISD::InputArg> &Ins,
858 const SDLoc &DL, SelectionDAG &DAG,
859 SmallVectorImpl<SDValue> &InVals) const override;
860
861 SDValue LowerCall(CallLoweringInfo & /*CLI*/,
862 SmallVectorImpl<SDValue> &InVals) const override;
863
864 SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
865 CallingConv::ID CallConv, bool isVarArg,
866 const SmallVectorImpl<ISD::InputArg> &Ins,
867 const SDLoc &DL, SelectionDAG &DAG,
868 SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
869 SDValue ThisVal) const;
870
871 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
872 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
873 SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const;
874 SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
875
876 SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
877 SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
878
879 SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const;
880
881 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
882
883 bool isEligibleForTailCallOptimization(
884 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
885 const SmallVectorImpl<ISD::OutputArg> &Outs,
886 const SmallVectorImpl<SDValue> &OutVals,
887 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
888
889 /// Finds the incoming stack arguments which overlap the given fixed stack
890 /// object and incorporates their load into the current chain. This prevents
891 /// an upcoming store from clobbering the stack argument before it's used.
892 SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
893 MachineFrameInfo &MFI, int ClobberedFI) const;
894
895 bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
896
897 void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL,
898 SDValue &Chain) const;
899
900 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
901 bool isVarArg,
902 const SmallVectorImpl<ISD::OutputArg> &Outs,
903 LLVMContext &Context) const override;
904
905 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
906 const SmallVectorImpl<ISD::OutputArg> &Outs,
907 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
908 SelectionDAG &DAG) const override;
909
910 SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
911 unsigned Flag) const;
912 SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
913 unsigned Flag) const;
914 SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
915 unsigned Flag) const;
916 SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
917 unsigned Flag) const;
918 template <class NodeTy>
919 SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
920 template <class NodeTy>
921 SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
922 template <class NodeTy>
923 SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
924 template <class NodeTy>
925 SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
926 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
927 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
928 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
929 SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
930 SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
931 SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase,
932 const SDLoc &DL, SelectionDAG &DAG) const;
933 SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL,
934 SelectionDAG &DAG) const;
935 SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
936 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
937 SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
938 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
939 SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
940 SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
941 SDValue TVal, SDValue FVal, const SDLoc &dl,
942 SelectionDAG &DAG) const;
943 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
944 SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
945 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
946 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
947 SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
948 SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
949 SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const;
950 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
951 SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
952 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
953 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
954 SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
955 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
956 SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
957 SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
958 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
959 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
960 SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
961 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
962 SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
963 SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
964 SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
965 SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, unsigned NewOp,
966 bool OverrideNEON = false) const;
967 SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
968 SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
969 SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
970 SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
971 SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;
972 SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
973 SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
974 SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
975 SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
976 SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
977 SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
978 SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const;
979 SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
980 SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
981 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
982 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
983 SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
984 SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
985 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
986 SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
987 SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
988 SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
989 SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
990 SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const;
991 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
992 SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
993 SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
994 SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
995 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
996 SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
997 SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
998 SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
999 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1000 SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain,
1001 SDValue &Size,
1002 SelectionDAG &DAG) const;
1003 SDValue LowerSVEStructLoad(unsigned Intrinsic, ArrayRef<SDValue> LoadOps,
1004 EVT VT, SelectionDAG &DAG, const SDLoc &DL) const;
1005
1006 SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
1007 SelectionDAG &DAG) const;
1008 SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,
1009 SelectionDAG &DAG) const;
1010 SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
1011 SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
1012 SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const;
1013 SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const;
1014 SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp,
1015 SelectionDAG &DAG) const;
1016 SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;
1017 SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;
1018 SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;
1019 SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op,
1020 SelectionDAG &DAG) const;
1021 SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,
1022 SelectionDAG &DAG) const;
1023 SDValue LowerFixedLengthExtractVectorElt(SDValue Op, SelectionDAG &DAG) const;
1024 SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const;
1025 SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const;
1026 SDValue LowerFixedLengthConcatVectorsToSVE(SDValue Op,
1027 SelectionDAG &DAG) const;
1028 SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const;
1029 SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const;
1030 SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const;
1031 SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const;
1032 SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op,
1033 SelectionDAG &DAG) const;
1034
1035 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1036 SmallVectorImpl<SDNode *> &Created) const override;
1037 SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1038 int &ExtraSteps, bool &UseOneConst,
1039 bool Reciprocal) const override;
1040 SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1041 int &ExtraSteps) const override;
1042 SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
1043 const DenormalMode &Mode) const override;
1044 SDValue getSqrtResultForDenormInput(SDValue Operand,
1045 SelectionDAG &DAG) const override;
1046 unsigned combineRepeatedFPDivisors() const override;
1047
1048 ConstraintType getConstraintType(StringRef Constraint) const override;
1049 Register getRegisterByName(const char* RegName, LLT VT,
1050 const MachineFunction &MF) const override;
1051
1052 /// Examine constraint string and operand type and determine a weight value.
1053 /// The operand object must already have been set up with the operand type.
1054 ConstraintWeight
1055 getSingleConstraintMatchWeight(AsmOperandInfo &info,
1056 const char *constraint) const override;
1057
1058 std::pair<unsigned, const TargetRegisterClass *>
1059 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1060 StringRef Constraint, MVT VT) const override;
1061
1062 const char *LowerXConstraint(EVT ConstraintVT) const override;
1063
1064 void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
1065 std::vector<SDValue> &Ops,
1066 SelectionDAG &DAG) const override;
1067
getInlineAsmMemConstraint(StringRef ConstraintCode)1068 unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1069 if (ConstraintCode == "Q")
1070 return InlineAsm::Constraint_Q;
1071 // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
1072 // followed by llvm_unreachable so we'll leave them unimplemented in
1073 // the backend for now.
1074 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1075 }
1076
1077 bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override;
1078 bool shouldRemoveExtendFromGSIndex(EVT VT) const override;
1079 bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
1080 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1081 bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1082 bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
1083 ISD::MemIndexedMode &AM, bool &IsInc,
1084 SelectionDAG &DAG) const;
1085 bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
1086 ISD::MemIndexedMode &AM,
1087 SelectionDAG &DAG) const override;
1088 bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
1089 SDValue &Offset, ISD::MemIndexedMode &AM,
1090 SelectionDAG &DAG) const override;
1091
1092 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1093 SelectionDAG &DAG) const override;
1094 void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1095 SelectionDAG &DAG) const;
1096 void ReplaceExtractSubVectorResults(SDNode *N,
1097 SmallVectorImpl<SDValue> &Results,
1098 SelectionDAG &DAG) const;
1099
1100 bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
1101
1102 void finalizeLowering(MachineFunction &MF) const override;
1103
1104 bool shouldLocalize(const MachineInstr &MI,
1105 const TargetTransformInfo *TTI) const override;
1106
1107 bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1108 const APInt &OriginalDemandedBits,
1109 const APInt &OriginalDemandedElts,
1110 KnownBits &Known,
1111 TargetLoweringOpt &TLO,
1112 unsigned Depth) const override;
1113
1114 // Normally SVE is only used for byte size vectors that do not fit within a
1115 // NEON vector. This changes when OverrideNEON is true, allowing SVE to be
1116 // used for 64bit and 128bit vectors as well.
1117 bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const;
1118
1119 // With the exception of data-predicate transitions, no instructions are
1120 // required to cast between legal scalable vector types. However:
1121 // 1. Packed and unpacked types have different bit lengths, meaning BITCAST
1122 // is not universally useable.
1123 // 2. Most unpacked integer types are not legal and thus integer extends
1124 // cannot be used to convert between unpacked and packed types.
1125 // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used
1126 // to transition between unpacked and packed types of the same element type,
1127 // with BITCAST used otherwise.
1128 SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const;
1129
1130 bool isConstantUnsignedBitfieldExtactLegal(unsigned Opc, LLT Ty1,
1131 LLT Ty2) const override;
1132 };
1133
1134 namespace AArch64 {
1135 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1136 const TargetLibraryInfo *libInfo);
1137 } // end namespace AArch64
1138
1139 } // end namespace llvm
1140
1141 #endif
1142