1 //==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that AArch64 uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
15 #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
16
17 #include "AArch64.h"
18 #include "llvm/CodeGen/CallingConvLower.h"
19 #include "llvm/CodeGen/SelectionDAG.h"
20 #include "llvm/CodeGen/TargetLowering.h"
21 #include "llvm/IR/CallingConv.h"
22 #include "llvm/IR/Instruction.h"
23
24 namespace llvm {
25
26 namespace AArch64ISD {
27
28 // For predicated nodes where the result is a vector, the operation is
29 // controlled by a governing predicate and the inactive lanes are explicitly
30 // defined with a value, please stick the following naming convention:
31 //
32 // _MERGE_OP<n> The result value is a vector with inactive lanes equal
33 // to source operand OP<n>.
34 //
35 // _MERGE_ZERO The result value is a vector with inactive lanes
36 // actively zeroed.
37 //
38 // _MERGE_PASSTHRU The result value is a vector with inactive lanes equal
39 // to the last source operand which only purpose is being
40 // a passthru value.
41 //
42 // For other cases where no explicit action is needed to set the inactive lanes,
43 // or when the result is not a vector and it is needed or helpful to
44 // distinguish a node from similar unpredicated nodes, use:
45 //
46 // _PRED
47 //
48 enum NodeType : unsigned {
49 FIRST_NUMBER = ISD::BUILTIN_OP_END,
50 WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
51 CALL, // Function call.
52
53 // Produces the full sequence of instructions for getting the thread pointer
54 // offset of a variable into X0, using the TLSDesc model.
55 TLSDESC_CALLSEQ,
56 ADRP, // Page address of a TargetGlobalAddress operand.
57 ADR, // ADR
58 ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand.
59 LOADgot, // Load from automatically generated descriptor (e.g. Global
60 // Offset Table, TLS record).
61 RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand.
62 BRCOND, // Conditional branch instruction; "b.cond".
63 CSEL,
64 FCSEL, // Conditional move instruction.
65 CSINV, // Conditional select invert.
66 CSNEG, // Conditional select negate.
67 CSINC, // Conditional select increment.
68
69 // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
70 // ELF.
71 THREAD_POINTER,
72 ADC,
73 SBC, // adc, sbc instructions
74
75 // Predicated instructions where inactive lanes produce undefined results.
76 ADD_PRED,
77 FADD_PRED,
78 FDIV_PRED,
79 FMA_PRED,
80 FMAXNM_PRED,
81 FMINNM_PRED,
82 FMUL_PRED,
83 FSUB_PRED,
84 MUL_PRED,
85 SDIV_PRED,
86 SHL_PRED,
87 SMAX_PRED,
88 SMIN_PRED,
89 SRA_PRED,
90 SRL_PRED,
91 SUB_PRED,
92 UDIV_PRED,
93 UMAX_PRED,
94 UMIN_PRED,
95
96 // Predicated instructions with the result of inactive lanes provided by the
97 // last operand.
98 FABS_MERGE_PASSTHRU,
99 FCEIL_MERGE_PASSTHRU,
100 FFLOOR_MERGE_PASSTHRU,
101 FNEARBYINT_MERGE_PASSTHRU,
102 FNEG_MERGE_PASSTHRU,
103 FRECPX_MERGE_PASSTHRU,
104 FRINT_MERGE_PASSTHRU,
105 FROUND_MERGE_PASSTHRU,
106 FROUNDEVEN_MERGE_PASSTHRU,
107 FSQRT_MERGE_PASSTHRU,
108 FTRUNC_MERGE_PASSTHRU,
109 FP_ROUND_MERGE_PASSTHRU,
110 FP_EXTEND_MERGE_PASSTHRU,
111 UINT_TO_FP_MERGE_PASSTHRU,
112 SINT_TO_FP_MERGE_PASSTHRU,
113 FCVTZU_MERGE_PASSTHRU,
114 FCVTZS_MERGE_PASSTHRU,
115 SIGN_EXTEND_INREG_MERGE_PASSTHRU,
116 ZERO_EXTEND_INREG_MERGE_PASSTHRU,
117 ABS_MERGE_PASSTHRU,
118 NEG_MERGE_PASSTHRU,
119
120 SETCC_MERGE_ZERO,
121
122 // Arithmetic instructions which write flags.
123 ADDS,
124 SUBS,
125 ADCS,
126 SBCS,
127 ANDS,
128
129 // Conditional compares. Operands: left,right,falsecc,cc,flags
130 CCMP,
131 CCMN,
132 FCCMP,
133
134 // Floating point comparison
135 FCMP,
136
137 // Scalar extract
138 EXTR,
139
140 // Scalar-to-vector duplication
141 DUP,
142 DUPLANE8,
143 DUPLANE16,
144 DUPLANE32,
145 DUPLANE64,
146
147 // Vector immedate moves
148 MOVI,
149 MOVIshift,
150 MOVIedit,
151 MOVImsl,
152 FMOV,
153 MVNIshift,
154 MVNImsl,
155
156 // Vector immediate ops
157 BICi,
158 ORRi,
159
160 // Vector bitwise select: similar to ISD::VSELECT but not all bits within an
161 // element must be identical.
162 BSP,
163
164 // Vector arithmetic negation
165 NEG,
166
167 // Vector shuffles
168 ZIP1,
169 ZIP2,
170 UZP1,
171 UZP2,
172 TRN1,
173 TRN2,
174 REV16,
175 REV32,
176 REV64,
177 EXT,
178
179 // Vector shift by scalar
180 VSHL,
181 VLSHR,
182 VASHR,
183
184 // Vector shift by scalar (again)
185 SQSHL_I,
186 UQSHL_I,
187 SQSHLU_I,
188 SRSHR_I,
189 URSHR_I,
190
191 // Vector shift by constant and insert
192 VSLI,
193 VSRI,
194
195 // Vector comparisons
196 CMEQ,
197 CMGE,
198 CMGT,
199 CMHI,
200 CMHS,
201 FCMEQ,
202 FCMGE,
203 FCMGT,
204
205 // Vector zero comparisons
206 CMEQz,
207 CMGEz,
208 CMGTz,
209 CMLEz,
210 CMLTz,
211 FCMEQz,
212 FCMGEz,
213 FCMGTz,
214 FCMLEz,
215 FCMLTz,
216
217 // Vector across-lanes addition
218 // Only the lower result lane is defined.
219 SADDV,
220 UADDV,
221
222 // Vector halving addition
223 SHADD,
224 UHADD,
225
226 // Vector rounding halving addition
227 SRHADD,
228 URHADD,
229
230 // Absolute difference
231 UABD,
232 SABD,
233
234 // Vector across-lanes min/max
235 // Only the lower result lane is defined.
236 SMINV,
237 UMINV,
238 SMAXV,
239 UMAXV,
240
241 SADDV_PRED,
242 UADDV_PRED,
243 SMAXV_PRED,
244 UMAXV_PRED,
245 SMINV_PRED,
246 UMINV_PRED,
247 ORV_PRED,
248 EORV_PRED,
249 ANDV_PRED,
250
251 // Vector bitwise insertion
252 BIT,
253
254 // Compare-and-branch
255 CBZ,
256 CBNZ,
257 TBZ,
258 TBNZ,
259
260 // Tail calls
261 TC_RETURN,
262
263 // Custom prefetch handling
264 PREFETCH,
265
266 // {s|u}int to FP within a FP register.
267 SITOF,
268 UITOF,
269
270 /// Natural vector cast. ISD::BITCAST is not natural in the big-endian
271 /// world w.r.t vectors; which causes additional REV instructions to be
272 /// generated to compensate for the byte-swapping. But sometimes we do
273 /// need to re-interpret the data in SIMD vector registers in big-endian
274 /// mode without emitting such REV instructions.
275 NVCAST,
276
277 SMULL,
278 UMULL,
279
280 // Reciprocal estimates and steps.
281 FRECPE,
282 FRECPS,
283 FRSQRTE,
284 FRSQRTS,
285
286 SUNPKHI,
287 SUNPKLO,
288 UUNPKHI,
289 UUNPKLO,
290
291 CLASTA_N,
292 CLASTB_N,
293 LASTA,
294 LASTB,
295 REV,
296 TBL,
297
298 // Floating-point reductions.
299 FADDA_PRED,
300 FADDV_PRED,
301 FMAXV_PRED,
302 FMAXNMV_PRED,
303 FMINV_PRED,
304 FMINNMV_PRED,
305
306 INSR,
307 PTEST,
308 PTRUE,
309
310 BITREVERSE_MERGE_PASSTHRU,
311 BSWAP_MERGE_PASSTHRU,
312 CTLZ_MERGE_PASSTHRU,
313 CTPOP_MERGE_PASSTHRU,
314 DUP_MERGE_PASSTHRU,
315 INDEX_VECTOR,
316
317 // Cast between vectors of the same element type but differ in length.
318 REINTERPRET_CAST,
319
320 LD1_MERGE_ZERO,
321 LD1S_MERGE_ZERO,
322 LDNF1_MERGE_ZERO,
323 LDNF1S_MERGE_ZERO,
324 LDFF1_MERGE_ZERO,
325 LDFF1S_MERGE_ZERO,
326 LD1RQ_MERGE_ZERO,
327 LD1RO_MERGE_ZERO,
328
329 // Structured loads.
330 SVE_LD2_MERGE_ZERO,
331 SVE_LD3_MERGE_ZERO,
332 SVE_LD4_MERGE_ZERO,
333
334 // Unsigned gather loads.
335 GLD1_MERGE_ZERO,
336 GLD1_SCALED_MERGE_ZERO,
337 GLD1_UXTW_MERGE_ZERO,
338 GLD1_SXTW_MERGE_ZERO,
339 GLD1_UXTW_SCALED_MERGE_ZERO,
340 GLD1_SXTW_SCALED_MERGE_ZERO,
341 GLD1_IMM_MERGE_ZERO,
342
343 // Signed gather loads
344 GLD1S_MERGE_ZERO,
345 GLD1S_SCALED_MERGE_ZERO,
346 GLD1S_UXTW_MERGE_ZERO,
347 GLD1S_SXTW_MERGE_ZERO,
348 GLD1S_UXTW_SCALED_MERGE_ZERO,
349 GLD1S_SXTW_SCALED_MERGE_ZERO,
350 GLD1S_IMM_MERGE_ZERO,
351
352 // Unsigned gather loads.
353 GLDFF1_MERGE_ZERO,
354 GLDFF1_SCALED_MERGE_ZERO,
355 GLDFF1_UXTW_MERGE_ZERO,
356 GLDFF1_SXTW_MERGE_ZERO,
357 GLDFF1_UXTW_SCALED_MERGE_ZERO,
358 GLDFF1_SXTW_SCALED_MERGE_ZERO,
359 GLDFF1_IMM_MERGE_ZERO,
360
361 // Signed gather loads.
362 GLDFF1S_MERGE_ZERO,
363 GLDFF1S_SCALED_MERGE_ZERO,
364 GLDFF1S_UXTW_MERGE_ZERO,
365 GLDFF1S_SXTW_MERGE_ZERO,
366 GLDFF1S_UXTW_SCALED_MERGE_ZERO,
367 GLDFF1S_SXTW_SCALED_MERGE_ZERO,
368 GLDFF1S_IMM_MERGE_ZERO,
369
370 // Non-temporal gather loads
371 GLDNT1_MERGE_ZERO,
372 GLDNT1_INDEX_MERGE_ZERO,
373 GLDNT1S_MERGE_ZERO,
374
375 // Contiguous masked store.
376 ST1_PRED,
377
378 // Scatter store
379 SST1_PRED,
380 SST1_SCALED_PRED,
381 SST1_UXTW_PRED,
382 SST1_SXTW_PRED,
383 SST1_UXTW_SCALED_PRED,
384 SST1_SXTW_SCALED_PRED,
385 SST1_IMM_PRED,
386
387 // Non-temporal scatter store
388 SSTNT1_PRED,
389 SSTNT1_INDEX_PRED,
390
391 // Strict (exception-raising) floating point comparison
392 STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
393 STRICT_FCMPE,
394
395 // NEON Load/Store with post-increment base updates
396 LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
397 LD3post,
398 LD4post,
399 ST2post,
400 ST3post,
401 ST4post,
402 LD1x2post,
403 LD1x3post,
404 LD1x4post,
405 ST1x2post,
406 ST1x3post,
407 ST1x4post,
408 LD1DUPpost,
409 LD2DUPpost,
410 LD3DUPpost,
411 LD4DUPpost,
412 LD1LANEpost,
413 LD2LANEpost,
414 LD3LANEpost,
415 LD4LANEpost,
416 ST2LANEpost,
417 ST3LANEpost,
418 ST4LANEpost,
419
420 STG,
421 STZG,
422 ST2G,
423 STZ2G,
424
425 LDP,
426 STP,
427 STNP,
428
429 // Pseudo for a OBJC call that gets emitted together with a special `mov
430 // x29, x29` marker instruction.
431 CALL_RVMARKER
432 };
433
434 } // end namespace AArch64ISD
435
436 namespace {
437
438 // Any instruction that defines a 32-bit result zeros out the high half of the
439 // register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
440 // be copying from a truncate. But any other 32-bit operation will zero-extend
441 // up to 64 bits. AssertSext/AssertZext aren't saying anything about the upper
442 // 32 bits, they're probably just qualifying a CopyFromReg.
443 // FIXME: X86 also checks for CMOV here. Do we need something similar?
isDef32(const SDNode & N)444 static inline bool isDef32(const SDNode &N) {
445 unsigned Opc = N.getOpcode();
446 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
447 Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
448 Opc != ISD::AssertZext;
449 }
450
451 } // end anonymous namespace
452
453 class AArch64Subtarget;
454 class AArch64TargetMachine;
455
456 class AArch64TargetLowering : public TargetLowering {
457 public:
458 explicit AArch64TargetLowering(const TargetMachine &TM,
459 const AArch64Subtarget &STI);
460
461 /// Selects the correct CCAssignFn for a given CallingConvention value.
462 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
463
464 /// Selects the correct CCAssignFn for a given CallingConvention value.
465 CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const;
466
467 /// Determine which of the bits specified in Mask are known to be either zero
468 /// or one and return them in the KnownZero/KnownOne bitsets.
469 void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
470 const APInt &DemandedElts,
471 const SelectionDAG &DAG,
472 unsigned Depth = 0) const override;
473
474 MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
475 // Returning i64 unconditionally here (i.e. even for ILP32) means that the
476 // *DAG* representation of pointers will always be 64-bits. They will be
477 // truncated and extended when transferred to memory, but the 64-bit DAG
478 // allows us to use AArch64's addressing modes much more easily.
479 return MVT::getIntegerVT(64);
480 }
481
482 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
483 const APInt &DemandedElts,
484 TargetLoweringOpt &TLO) const override;
485
486 MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
487
488 /// Returns true if the target allows unaligned memory accesses of the
489 /// specified type.
490 bool allowsMisalignedMemoryAccesses(
491 EVT VT, unsigned AddrSpace = 0, unsigned Align = 1,
492 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
493 bool *Fast = nullptr) const override;
494 /// LLT variant.
495 bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace,
496 Align Alignment,
497 MachineMemOperand::Flags Flags,
498 bool *Fast = nullptr) const override;
499
500 /// Provide custom lowering hooks for some operations.
501 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
502
503 const char *getTargetNodeName(unsigned Opcode) const override;
504
505 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
506
507 /// This method returns a target specific FastISel object, or null if the
508 /// target does not support "fast" ISel.
509 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
510 const TargetLibraryInfo *libInfo) const override;
511
512 bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
513
514 bool isFPImmLegal(const APFloat &Imm, EVT VT,
515 bool ForCodeSize) const override;
516
517 /// Return true if the given shuffle mask can be codegen'd directly, or if it
518 /// should be stack expanded.
519 bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
520
521 /// Return the ISD::SETCC ValueType.
522 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
523 EVT VT) const override;
524
525 SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
526
527 MachineBasicBlock *EmitF128CSEL(MachineInstr &MI,
528 MachineBasicBlock *BB) const;
529
530 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
531 MachineBasicBlock *BB) const;
532
533 MachineBasicBlock *
534 EmitInstrWithCustomInserter(MachineInstr &MI,
535 MachineBasicBlock *MBB) const override;
536
537 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
538 MachineFunction &MF,
539 unsigned Intrinsic) const override;
540
541 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
542 EVT NewVT) const override;
543
544 bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
545 bool isTruncateFree(EVT VT1, EVT VT2) const override;
546
547 bool isProfitableToHoist(Instruction *I) const override;
548
549 bool isZExtFree(Type *Ty1, Type *Ty2) const override;
550 bool isZExtFree(EVT VT1, EVT VT2) const override;
551 bool isZExtFree(SDValue Val, EVT VT2) const override;
552
553 bool shouldSinkOperands(Instruction *I,
554 SmallVectorImpl<Use *> &Ops) const override;
555
556 bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override;
557
getMaxSupportedInterleaveFactor()558 unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
559
560 bool lowerInterleavedLoad(LoadInst *LI,
561 ArrayRef<ShuffleVectorInst *> Shuffles,
562 ArrayRef<unsigned> Indices,
563 unsigned Factor) const override;
564 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
565 unsigned Factor) const override;
566
567 bool isLegalAddImmediate(int64_t) const override;
568 bool isLegalICmpImmediate(int64_t) const override;
569
570 bool shouldConsiderGEPOffsetSplit() const override;
571
572 EVT getOptimalMemOpType(const MemOp &Op,
573 const AttributeList &FuncAttributes) const override;
574
575 LLT getOptimalMemOpLLT(const MemOp &Op,
576 const AttributeList &FuncAttributes) const override;
577
578 /// Return true if the addressing mode represented by AM is legal for this
579 /// target, for a load/store of the specified type.
580 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
581 unsigned AS,
582 Instruction *I = nullptr) const override;
583
584 /// Return the cost of the scaling factor used in the addressing
585 /// mode represented by AM for this target, for a load/store
586 /// of the specified type.
587 /// If the AM is supported, the return value must be >= 0.
588 /// If the AM is not supported, it returns a negative value.
589 int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
590 unsigned AS) const override;
591
592 /// Return true if an FMA operation is faster than a pair of fmul and fadd
593 /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
594 /// returns true, otherwise fmuladd is expanded to fmul + fadd.
595 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
596 EVT VT) const override;
597 bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
598
599 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
600
601 /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
602 bool isDesirableToCommuteWithShift(const SDNode *N,
603 CombineLevel Level) const override;
604
605 /// Returns true if it is beneficial to convert a load of a constant
606 /// to just the constant itself.
607 bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
608 Type *Ty) const override;
609
610 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
611 /// with this index.
612 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
613 unsigned Index) const override;
614
shouldFormOverflowOp(unsigned Opcode,EVT VT,bool MathUsed)615 bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
616 bool MathUsed) const override {
617 // Using overflow ops for overflow checks only should beneficial on
618 // AArch64.
619 return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
620 }
621
622 Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
623 AtomicOrdering Ord) const override;
624 Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
625 Value *Addr, AtomicOrdering Ord) const override;
626
627 void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const override;
628
629 TargetLoweringBase::AtomicExpansionKind
630 shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
631 bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
632 TargetLoweringBase::AtomicExpansionKind
633 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
634
635 TargetLoweringBase::AtomicExpansionKind
636 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
637
638 bool useLoadStackGuardNode() const override;
639 TargetLoweringBase::LegalizeTypeAction
640 getPreferredVectorAction(MVT VT) const override;
641
642 /// If the target has a standard location for the stack protector cookie,
643 /// returns the address of that location. Otherwise, returns nullptr.
644 Value *getIRStackGuard(IRBuilder<> &IRB) const override;
645
646 void insertSSPDeclarations(Module &M) const override;
647 Value *getSDagStackGuard(const Module &M) const override;
648 Function *getSSPStackGuardCheck(const Module &M) const override;
649
650 /// If the target has a standard location for the unsafe stack pointer,
651 /// returns the address of that location. Otherwise, returns nullptr.
652 Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
653
654 /// If a physical register, this returns the register that receives the
655 /// exception address on entry to an EH pad.
656 Register
getExceptionPointerRegister(const Constant * PersonalityFn)657 getExceptionPointerRegister(const Constant *PersonalityFn) const override {
658 // FIXME: This is a guess. Has this been defined yet?
659 return AArch64::X0;
660 }
661
662 /// If a physical register, this returns the register that receives the
663 /// exception typeid on entry to a landing pad.
664 Register
getExceptionSelectorRegister(const Constant * PersonalityFn)665 getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
666 // FIXME: This is a guess. Has this been defined yet?
667 return AArch64::X1;
668 }
669
670 bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
671
canMergeStoresTo(unsigned AddressSpace,EVT MemVT,const SelectionDAG & DAG)672 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
673 const SelectionDAG &DAG) const override {
674 // Do not merge to float value size (128 bytes) if no implicit
675 // float attribute is set.
676
677 bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute(
678 Attribute::NoImplicitFloat);
679
680 if (NoFloat)
681 return (MemVT.getSizeInBits() <= 64);
682 return true;
683 }
684
isCheapToSpeculateCttz()685 bool isCheapToSpeculateCttz() const override {
686 return true;
687 }
688
isCheapToSpeculateCtlz()689 bool isCheapToSpeculateCtlz() const override {
690 return true;
691 }
692
693 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
694
hasAndNotCompare(SDValue V)695 bool hasAndNotCompare(SDValue V) const override {
696 // We can use bics for any scalar.
697 return V.getValueType().isScalarInteger();
698 }
699
hasAndNot(SDValue Y)700 bool hasAndNot(SDValue Y) const override {
701 EVT VT = Y.getValueType();
702
703 if (!VT.isVector())
704 return hasAndNotCompare(Y);
705
706 return VT.getSizeInBits() >= 64; // vector 'bic'
707 }
708
709 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
710 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
711 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
712 SelectionDAG &DAG) const override;
713
714 bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
715
shouldTransformSignedTruncationCheck(EVT XVT,unsigned KeptBits)716 bool shouldTransformSignedTruncationCheck(EVT XVT,
717 unsigned KeptBits) const override {
718 // For vectors, we don't have a preference..
719 if (XVT.isVector())
720 return false;
721
722 auto VTIsOk = [](EVT VT) -> bool {
723 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
724 VT == MVT::i64;
725 };
726
727 // We are ok with KeptBitsVT being byte/word/dword, what SXT supports.
728 // XVT will be larger than KeptBitsVT.
729 MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
730 return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
731 }
732
733 bool preferIncOfAddToSubOfNot(EVT VT) const override;
734
hasBitPreservingFPLogic(EVT VT)735 bool hasBitPreservingFPLogic(EVT VT) const override {
736 // FIXME: Is this always true? It should be true for vectors at least.
737 return VT == MVT::f32 || VT == MVT::f64;
738 }
739
supportSplitCSR(MachineFunction * MF)740 bool supportSplitCSR(MachineFunction *MF) const override {
741 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
742 MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
743 }
744 void initializeSplitCSR(MachineBasicBlock *Entry) const override;
745 void insertCopiesSplitCSR(
746 MachineBasicBlock *Entry,
747 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
748
supportSwiftError()749 bool supportSwiftError() const override {
750 return true;
751 }
752
753 /// Enable aggressive FMA fusion on targets that want it.
754 bool enableAggressiveFMAFusion(EVT VT) const override;
755
756 /// Returns the size of the platform's va_list object.
757 unsigned getVaListSizeInBits(const DataLayout &DL) const override;
758
759 /// Returns true if \p VecTy is a legal interleaved access type. This
760 /// function checks the vector element type and the overall width of the
761 /// vector.
762 bool isLegalInterleavedAccessType(VectorType *VecTy,
763 const DataLayout &DL) const;
764
765 /// Returns the number of interleaved accesses that will be generated when
766 /// lowering accesses of the given type.
767 unsigned getNumInterleavedAccesses(VectorType *VecTy,
768 const DataLayout &DL) const;
769
770 MachineMemOperand::Flags getTargetMMOFlags(
771 const Instruction &I) const override;
772
773 bool functionArgumentNeedsConsecutiveRegisters(Type *Ty,
774 CallingConv::ID CallConv,
775 bool isVarArg) const override;
776 /// Used for exception handling on Win64.
777 bool needsFixedCatchObjects() const override;
778
779 bool fallBackToDAGISel(const Instruction &Inst) const override;
780
781 /// SVE code generation for fixed length vectors does not custom lower
782 /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to
783 /// merge. However, merging them creates a BUILD_VECTOR that is just as
784 /// illegal as the original, thus leading to an infinite legalisation loop.
785 /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal
786 /// vector types this override can be removed.
787 bool mergeStoresAfterLegalization(EVT VT) const override;
788
789 private:
790 /// Keep a pointer to the AArch64Subtarget around so that we can
791 /// make the right decision when generating code for different targets.
792 const AArch64Subtarget *Subtarget;
793
794 bool isExtFreeImpl(const Instruction *Ext) const override;
795
796 void addTypeForNEON(MVT VT, MVT PromotedBitwiseVT);
797 void addTypeForFixedLengthSVE(MVT VT);
798 void addDRTypeForNEON(MVT VT);
799 void addQRTypeForNEON(MVT VT);
800
801 SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
802 bool isVarArg,
803 const SmallVectorImpl<ISD::InputArg> &Ins,
804 const SDLoc &DL, SelectionDAG &DAG,
805 SmallVectorImpl<SDValue> &InVals) const override;
806
807 SDValue LowerCall(CallLoweringInfo & /*CLI*/,
808 SmallVectorImpl<SDValue> &InVals) const override;
809
810 SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
811 CallingConv::ID CallConv, bool isVarArg,
812 const SmallVectorImpl<ISD::InputArg> &Ins,
813 const SDLoc &DL, SelectionDAG &DAG,
814 SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
815 SDValue ThisVal) const;
816
817 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
818 SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
819
820 SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
821 SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
822
823 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
824
825 bool isEligibleForTailCallOptimization(
826 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
827 const SmallVectorImpl<ISD::OutputArg> &Outs,
828 const SmallVectorImpl<SDValue> &OutVals,
829 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
830
831 /// Finds the incoming stack arguments which overlap the given fixed stack
832 /// object and incorporates their load into the current chain. This prevents
833 /// an upcoming store from clobbering the stack argument before it's used.
834 SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
835 MachineFrameInfo &MFI, int ClobberedFI) const;
836
837 bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
838
839 void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL,
840 SDValue &Chain) const;
841
842 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
843 bool isVarArg,
844 const SmallVectorImpl<ISD::OutputArg> &Outs,
845 LLVMContext &Context) const override;
846
847 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
848 const SmallVectorImpl<ISD::OutputArg> &Outs,
849 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
850 SelectionDAG &DAG) const override;
851
852 SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
853 unsigned Flag) const;
854 SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
855 unsigned Flag) const;
856 SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
857 unsigned Flag) const;
858 SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
859 unsigned Flag) const;
860 template <class NodeTy>
861 SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
862 template <class NodeTy>
863 SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
864 template <class NodeTy>
865 SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
866 template <class NodeTy>
867 SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
868 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
869 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
870 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
871 SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
872 SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
873 SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase,
874 const SDLoc &DL, SelectionDAG &DAG) const;
875 SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL,
876 SelectionDAG &DAG) const;
877 SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
878 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
879 SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
880 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
881 SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
882 SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
883 SDValue TVal, SDValue FVal, const SDLoc &dl,
884 SelectionDAG &DAG) const;
885 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
886 SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
887 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
888 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
889 SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
890 SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
891 SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const;
892 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
893 SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
894 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
895 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
896 SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
897 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
898 SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
899 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
900 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
901 SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
902 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
903 SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
904 SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
905 SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
906 SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, unsigned NewOp,
907 bool OverrideNEON = false) const;
908 SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
909 SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
910 SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
911 SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;
912 SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
913 SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
914 SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
915 SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
916 SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
917 SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
918 SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
919 SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
920 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
921 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
922 SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
923 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
924 SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
925 SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
926 SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
927 SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const;
928 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
929 SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
930 SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
931 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
932 SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
933 SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
934 SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
935 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
936 SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain,
937 SDValue &Size,
938 SelectionDAG &DAG) const;
939 SDValue LowerSVEStructLoad(unsigned Intrinsic, ArrayRef<SDValue> LoadOps,
940 EVT VT, SelectionDAG &DAG, const SDLoc &DL) const;
941
942 SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
943 SelectionDAG &DAG) const;
944 SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,
945 SelectionDAG &DAG) const;
946 SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
947 SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const;
948 SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const;
949 SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp,
950 SelectionDAG &DAG) const;
951 SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;
952 SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;
953 SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;
954 SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,
955 SelectionDAG &DAG) const;
956
957 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
958 SmallVectorImpl<SDNode *> &Created) const override;
959 SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
960 int &ExtraSteps, bool &UseOneConst,
961 bool Reciprocal) const override;
962 SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
963 int &ExtraSteps) const override;
964 SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
965 const DenormalMode &Mode) const override;
966 SDValue getSqrtResultForDenormInput(SDValue Operand,
967 SelectionDAG &DAG) const override;
968 unsigned combineRepeatedFPDivisors() const override;
969
970 ConstraintType getConstraintType(StringRef Constraint) const override;
971 Register getRegisterByName(const char* RegName, LLT VT,
972 const MachineFunction &MF) const override;
973
974 /// Examine constraint string and operand type and determine a weight value.
975 /// The operand object must already have been set up with the operand type.
976 ConstraintWeight
977 getSingleConstraintMatchWeight(AsmOperandInfo &info,
978 const char *constraint) const override;
979
980 std::pair<unsigned, const TargetRegisterClass *>
981 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
982 StringRef Constraint, MVT VT) const override;
983
984 const char *LowerXConstraint(EVT ConstraintVT) const override;
985
986 void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
987 std::vector<SDValue> &Ops,
988 SelectionDAG &DAG) const override;
989
getInlineAsmMemConstraint(StringRef ConstraintCode)990 unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
991 if (ConstraintCode == "Q")
992 return InlineAsm::Constraint_Q;
993 // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
994 // followed by llvm_unreachable so we'll leave them unimplemented in
995 // the backend for now.
996 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
997 }
998
999 bool shouldRemoveExtendFromGSIndex(EVT VT) const override;
1000 bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
1001 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1002 bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1003 bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
1004 ISD::MemIndexedMode &AM, bool &IsInc,
1005 SelectionDAG &DAG) const;
1006 bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
1007 ISD::MemIndexedMode &AM,
1008 SelectionDAG &DAG) const override;
1009 bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
1010 SDValue &Offset, ISD::MemIndexedMode &AM,
1011 SelectionDAG &DAG) const override;
1012
1013 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1014 SelectionDAG &DAG) const override;
1015 void ReplaceExtractSubVectorResults(SDNode *N,
1016 SmallVectorImpl<SDValue> &Results,
1017 SelectionDAG &DAG) const;
1018
1019 bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
1020
1021 void finalizeLowering(MachineFunction &MF) const override;
1022
1023 bool shouldLocalize(const MachineInstr &MI,
1024 const TargetTransformInfo *TTI) const override;
1025
1026 // Normally SVE is only used for byte size vectors that do not fit within a
1027 // NEON vector. This changes when OverrideNEON is true, allowing SVE to be
1028 // used for 64bit and 128bit vectors as well.
1029 bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const;
1030
1031 // With the exception of data-predicate transitions, no instructions are
1032 // required to cast between legal scalable vector types. However:
1033 // 1. Packed and unpacked types have different bit lengths, meaning BITCAST
1034 // is not universally useable.
1035 // 2. Most unpacked integer types are not legal and thus integer extends
1036 // cannot be used to convert between unpacked and packed types.
1037 // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used
1038 // to transition between unpacked and packed types of the same element type,
1039 // with BITCAST used otherwise.
1040 SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const;
1041 };
1042
1043 namespace AArch64 {
1044 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1045 const TargetLibraryInfo *libInfo);
1046 } // end namespace AArch64
1047
1048 } // end namespace llvm
1049
1050 #endif
1051