1 //==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the interfaces that AArch64 uses to lower LLVM code into a
11 // selection DAG.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
16 #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
17 
18 #include "llvm/CodeGen/CallingConvLower.h"
19 #include "llvm/CodeGen/SelectionDAG.h"
20 #include "llvm/IR/CallingConv.h"
21 #include "llvm/Target/TargetLowering.h"
22 
23 namespace llvm {
24 
25 namespace AArch64ISD {
26 
27 enum {
28   FIRST_NUMBER = ISD::BUILTIN_OP_END,
29   WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
30   CALL,         // Function call.
31 
32   // Produces the full sequence of instructions for getting the thread pointer
33   // offset of a variable into X0, using the TLSDesc model.
34   TLSDESC_CALLSEQ,
35   ADRP,     // Page address of a TargetGlobalAddress operand.
36   ADDlow,   // Add the low 12 bits of a TargetGlobalAddress operand.
37   LOADgot,  // Load from automatically generated descriptor (e.g. Global
38             // Offset Table, TLS record).
39   RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand.
40   BRCOND,   // Conditional branch instruction; "b.cond".
41   CSEL,
42   FCSEL, // Conditional move instruction.
43   CSINV, // Conditional select invert.
44   CSNEG, // Conditional select negate.
45   CSINC, // Conditional select increment.
46 
47   // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
48   // ELF.
49   THREAD_POINTER,
50   ADC,
51   SBC, // adc, sbc instructions
52 
53   // Arithmetic instructions which write flags.
54   ADDS,
55   SUBS,
56   ADCS,
57   SBCS,
58   ANDS,
59 
60   // Floating point comparison
61   FCMP,
62 
63   // Floating point max and min instructions.
64   FMAX,
65   FMIN,
66 
67   // Scalar extract
68   EXTR,
69 
70   // Scalar-to-vector duplication
71   DUP,
72   DUPLANE8,
73   DUPLANE16,
74   DUPLANE32,
75   DUPLANE64,
76 
77   // Vector immedate moves
78   MOVI,
79   MOVIshift,
80   MOVIedit,
81   MOVImsl,
82   FMOV,
83   MVNIshift,
84   MVNImsl,
85 
86   // Vector immediate ops
87   BICi,
88   ORRi,
89 
90   // Vector bit select: similar to ISD::VSELECT but not all bits within an
91   // element must be identical.
92   BSL,
93 
94   // Vector arithmetic negation
95   NEG,
96 
97   // Vector shuffles
98   ZIP1,
99   ZIP2,
100   UZP1,
101   UZP2,
102   TRN1,
103   TRN2,
104   REV16,
105   REV32,
106   REV64,
107   EXT,
108 
109   // Vector shift by scalar
110   VSHL,
111   VLSHR,
112   VASHR,
113 
114   // Vector shift by scalar (again)
115   SQSHL_I,
116   UQSHL_I,
117   SQSHLU_I,
118   SRSHR_I,
119   URSHR_I,
120 
121   // Vector comparisons
122   CMEQ,
123   CMGE,
124   CMGT,
125   CMHI,
126   CMHS,
127   FCMEQ,
128   FCMGE,
129   FCMGT,
130 
131   // Vector zero comparisons
132   CMEQz,
133   CMGEz,
134   CMGTz,
135   CMLEz,
136   CMLTz,
137   FCMEQz,
138   FCMGEz,
139   FCMGTz,
140   FCMLEz,
141   FCMLTz,
142 
143   // Vector bitwise negation
144   NOT,
145 
146   // Vector bitwise selection
147   BIT,
148 
149   // Compare-and-branch
150   CBZ,
151   CBNZ,
152   TBZ,
153   TBNZ,
154 
155   // Tail calls
156   TC_RETURN,
157 
158   // Custom prefetch handling
159   PREFETCH,
160 
161   // {s|u}int to FP within a FP register.
162   SITOF,
163   UITOF,
164 
165   /// Natural vector cast. ISD::BITCAST is not natural in the big-endian
166   /// world w.r.t vectors; which causes additional REV instructions to be
167   /// generated to compensate for the byte-swapping. But sometimes we do
168   /// need to re-interpret the data in SIMD vector registers in big-endian
169   /// mode without emitting such REV instructions.
170   NVCAST,
171 
172   SMULL,
173   UMULL,
174 
175   // NEON Load/Store with post-increment base updates
176   LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
177   LD3post,
178   LD4post,
179   ST2post,
180   ST3post,
181   ST4post,
182   LD1x2post,
183   LD1x3post,
184   LD1x4post,
185   ST1x2post,
186   ST1x3post,
187   ST1x4post,
188   LD1DUPpost,
189   LD2DUPpost,
190   LD3DUPpost,
191   LD4DUPpost,
192   LD1LANEpost,
193   LD2LANEpost,
194   LD3LANEpost,
195   LD4LANEpost,
196   ST2LANEpost,
197   ST3LANEpost,
198   ST4LANEpost
199 };
200 
201 } // end namespace AArch64ISD
202 
203 class AArch64Subtarget;
204 class AArch64TargetMachine;
205 
206 class AArch64TargetLowering : public TargetLowering {
207   bool RequireStrictAlign;
208 
209 public:
210   explicit AArch64TargetLowering(const TargetMachine &TM);
211 
212   /// Selects the correct CCAssignFn for a given CallingConvention value.
213   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
214 
215   /// computeKnownBitsForTargetNode - Determine which of the bits specified in
216   /// Mask are known to be either zero or one and return them in the
217   /// KnownZero/KnownOne bitsets.
218   void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero,
219                                      APInt &KnownOne, const SelectionDAG &DAG,
220                                      unsigned Depth = 0) const override;
221 
222   MVT getScalarShiftAmountTy(EVT LHSTy) const override;
223 
224   /// allowsMisalignedMemoryAccesses - Returns true if the target allows
225   /// unaligned memory accesses. of the specified type.
226   bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace = 0,
227                                       unsigned Align = 1,
228                                       bool *Fast = nullptr) const override {
229     if (RequireStrictAlign)
230       return false;
231     // FIXME: True for Cyclone, but not necessary others.
232     if (Fast)
233       *Fast = true;
234     return true;
235   }
236 
237   /// LowerOperation - Provide custom lowering hooks for some operations.
238   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
239 
240   const char *getTargetNodeName(unsigned Opcode) const override;
241 
242   SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
243 
244   /// getFunctionAlignment - Return the Log2 alignment of this function.
245   unsigned getFunctionAlignment(const Function *F) const;
246 
247   /// getMaximalGlobalOffset - Returns the maximal possible offset which can
248   /// be used for loads / stores from the global.
249   unsigned getMaximalGlobalOffset() const override;
250 
251   /// Returns true if a cast between SrcAS and DestAS is a noop.
isNoopAddrSpaceCast(unsigned SrcAS,unsigned DestAS)252   bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
253     // Addrspacecasts are always noops.
254     return true;
255   }
256 
257   /// createFastISel - This method returns a target specific FastISel object,
258   /// or null if the target does not support "fast" ISel.
259   FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
260                            const TargetLibraryInfo *libInfo) const override;
261 
262   bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
263 
264   bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
265 
266   /// isShuffleMaskLegal - Return true if the given shuffle mask can be
267   /// codegen'd directly, or if it should be stack expanded.
268   bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const override;
269 
270   /// getSetCCResultType - Return the ISD::SETCC ValueType
271   EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
272 
273   SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
274 
275   MachineBasicBlock *EmitF128CSEL(MachineInstr *MI,
276                                   MachineBasicBlock *BB) const;
277 
278   MachineBasicBlock *
279   EmitInstrWithCustomInserter(MachineInstr *MI,
280                               MachineBasicBlock *MBB) const override;
281 
282   bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
283                           unsigned Intrinsic) const override;
284 
285   bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
286   bool isTruncateFree(EVT VT1, EVT VT2) const override;
287 
288   bool isZExtFree(Type *Ty1, Type *Ty2) const override;
289   bool isZExtFree(EVT VT1, EVT VT2) const override;
290   bool isZExtFree(SDValue Val, EVT VT2) const override;
291 
292   bool hasPairedLoad(Type *LoadedType,
293                      unsigned &RequiredAligment) const override;
294   bool hasPairedLoad(EVT LoadedType, unsigned &RequiredAligment) const override;
295 
296   bool isLegalAddImmediate(int64_t) const override;
297   bool isLegalICmpImmediate(int64_t) const override;
298 
299   EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
300                           bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
301                           MachineFunction &MF) const override;
302 
303   /// isLegalAddressingMode - Return true if the addressing mode represented
304   /// by AM is legal for this target, for a load/store of the specified type.
305   bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
306 
307   /// \brief Return the cost of the scaling factor used in the addressing
308   /// mode represented by AM for this target, for a load/store
309   /// of the specified type.
310   /// If the AM is supported, the return value must be >= 0.
311   /// If the AM is not supported, it returns a negative value.
312   int getScalingFactorCost(const AddrMode &AM, Type *Ty) const override;
313 
314   /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
315   /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
316   /// expanded to FMAs when this method returns true, otherwise fmuladd is
317   /// expanded to fmul + fadd.
318   bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
319 
320   const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
321 
322   /// \brief Returns false if N is a bit extraction pattern of (X >> C) & Mask.
323   bool isDesirableToCommuteWithShift(const SDNode *N) const override;
324 
325   /// \brief Returns true if it is beneficial to convert a load of a constant
326   /// to just the constant itself.
327   bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
328                                          Type *Ty) const override;
329 
330   bool hasLoadLinkedStoreConditional() const override;
331   Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
332                         AtomicOrdering Ord) const override;
333   Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
334                               Value *Addr, AtomicOrdering Ord) const override;
335 
336   bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
337   bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
338   bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
339 
340   bool useLoadStackGuardNode() const override;
341   TargetLoweringBase::LegalizeTypeAction
342   getPreferredVectorAction(EVT VT) const override;
343 
344 private:
345   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
346   /// make the right decision when generating code for different targets.
347   const AArch64Subtarget *Subtarget;
348 
349   void addTypeForNEON(EVT VT, EVT PromotedBitwiseVT);
350   void addDRTypeForNEON(MVT VT);
351   void addQRTypeForNEON(MVT VT);
352 
353   SDValue
354   LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
355                        const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL,
356                        SelectionDAG &DAG,
357                        SmallVectorImpl<SDValue> &InVals) const override;
358 
359   SDValue LowerCall(CallLoweringInfo & /*CLI*/,
360                     SmallVectorImpl<SDValue> &InVals) const override;
361 
362   SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
363                           CallingConv::ID CallConv, bool isVarArg,
364                           const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL,
365                           SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
366                           bool isThisReturn, SDValue ThisVal) const;
367 
368   bool isEligibleForTailCallOptimization(
369       SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
370       bool isCalleeStructRet, bool isCallerStructRet,
371       const SmallVectorImpl<ISD::OutputArg> &Outs,
372       const SmallVectorImpl<SDValue> &OutVals,
373       const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
374 
375   /// Finds the incoming stack arguments which overlap the given fixed stack
376   /// object and incorporates their load into the current chain. This prevents
377   /// an upcoming store from clobbering the stack argument before it's used.
378   SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
379                               MachineFrameInfo *MFI, int ClobberedFI) const;
380 
381   bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
382 
383   bool IsTailCallConvention(CallingConv::ID CallCC) const;
384 
385   void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, SDLoc DL,
386                            SDValue &Chain) const;
387 
388   bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
389                       bool isVarArg,
390                       const SmallVectorImpl<ISD::OutputArg> &Outs,
391                       LLVMContext &Context) const override;
392 
393   SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
394                       const SmallVectorImpl<ISD::OutputArg> &Outs,
395                       const SmallVectorImpl<SDValue> &OutVals, SDLoc DL,
396                       SelectionDAG &DAG) const override;
397 
398   SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
399   SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
400   SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
401   SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
402   SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, SDLoc DL,
403                                  SelectionDAG &DAG) const;
404   SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
405   SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
406   SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
407   SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
408   SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
409   SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
410   SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
411   SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
412   SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
413   SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
414   SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
415   SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
416   SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
417   SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
418   SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
419   SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
420   SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
421   SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
422   SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
423   SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
424   SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
425   SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
426   SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
427   SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
428   SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
429   SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG,
430                         RTLIB::Libcall Call) const;
431   SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
432   SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
433   SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
434   SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
435   SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
436   SDValue LowerVectorAND(SDValue Op, SelectionDAG &DAG) const;
437   SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
438   SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
439   SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
440 
441   SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
442                         std::vector<SDNode *> *Created) const override;
443   bool combineRepeatedFPDivisors(unsigned NumUsers) const override;
444 
445   ConstraintType
446   getConstraintType(const std::string &Constraint) const override;
447   unsigned getRegisterByName(const char* RegName, EVT VT) const override;
448 
449   /// Examine constraint string and operand type and determine a weight value.
450   /// The operand object must already have been set up with the operand type.
451   ConstraintWeight
452   getSingleConstraintMatchWeight(AsmOperandInfo &info,
453                                  const char *constraint) const override;
454 
455   std::pair<unsigned, const TargetRegisterClass *>
456   getRegForInlineAsmConstraint(const std::string &Constraint,
457                                MVT VT) const override;
458   void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
459                                     std::vector<SDValue> &Ops,
460                                     SelectionDAG &DAG) const override;
461 
462   bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
463   bool mayBeEmittedAsTailCall(CallInst *CI) const override;
464   bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
465                               ISD::MemIndexedMode &AM, bool &IsInc,
466                               SelectionDAG &DAG) const;
467   bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
468                                  ISD::MemIndexedMode &AM,
469                                  SelectionDAG &DAG) const override;
470   bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
471                                   SDValue &Offset, ISD::MemIndexedMode &AM,
472                                   SelectionDAG &DAG) const override;
473 
474   void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
475                           SelectionDAG &DAG) const override;
476 
477   bool functionArgumentNeedsConsecutiveRegisters(Type *Ty,
478                                                  CallingConv::ID CallConv,
479                                                  bool isVarArg) const override;
480 };
481 
482 namespace AArch64 {
483 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
484                          const TargetLibraryInfo *libInfo);
485 } // end namespace AArch64
486 
487 } // end namespace llvm
488 
489 #endif
490