1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the AArch64-specific support for the FastISel class. Some
10 // of the target-specific code is generated by tablegen in the file
11 // AArch64GenFastISel.inc, which is #included here.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AArch64.h"
16 #include "AArch64CallingConvention.h"
17 #include "AArch64MachineFunctionInfo.h"
18 #include "AArch64RegisterInfo.h"
19 #include "AArch64Subtarget.h"
20 #include "MCTargetDesc/AArch64AddressingModes.h"
21 #include "Utils/AArch64BaseInfo.h"
22 #include "llvm/ADT/APFloat.h"
23 #include "llvm/ADT/APInt.h"
24 #include "llvm/ADT/DenseMap.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/Analysis/BranchProbabilityInfo.h"
27 #include "llvm/CodeGen/CallingConvLower.h"
28 #include "llvm/CodeGen/FastISel.h"
29 #include "llvm/CodeGen/FunctionLoweringInfo.h"
30 #include "llvm/CodeGen/ISDOpcodes.h"
31 #include "llvm/CodeGen/MachineBasicBlock.h"
32 #include "llvm/CodeGen/MachineConstantPool.h"
33 #include "llvm/CodeGen/MachineFrameInfo.h"
34 #include "llvm/CodeGen/MachineInstr.h"
35 #include "llvm/CodeGen/MachineInstrBuilder.h"
36 #include "llvm/CodeGen/MachineMemOperand.h"
37 #include "llvm/CodeGen/MachineRegisterInfo.h"
38 #include "llvm/CodeGen/MachineValueType.h"
39 #include "llvm/CodeGen/RuntimeLibcalls.h"
40 #include "llvm/CodeGen/ValueTypes.h"
41 #include "llvm/IR/Argument.h"
42 #include "llvm/IR/Attributes.h"
43 #include "llvm/IR/BasicBlock.h"
44 #include "llvm/IR/CallingConv.h"
45 #include "llvm/IR/Constant.h"
46 #include "llvm/IR/Constants.h"
47 #include "llvm/IR/DataLayout.h"
48 #include "llvm/IR/DerivedTypes.h"
49 #include "llvm/IR/Function.h"
50 #include "llvm/IR/GetElementPtrTypeIterator.h"
51 #include "llvm/IR/GlobalValue.h"
52 #include "llvm/IR/InstrTypes.h"
53 #include "llvm/IR/Instruction.h"
54 #include "llvm/IR/Instructions.h"
55 #include "llvm/IR/IntrinsicInst.h"
56 #include "llvm/IR/Intrinsics.h"
57 #include "llvm/IR/IntrinsicsAArch64.h"
58 #include "llvm/IR/Operator.h"
59 #include "llvm/IR/Type.h"
60 #include "llvm/IR/User.h"
61 #include "llvm/IR/Value.h"
62 #include "llvm/MC/MCInstrDesc.h"
63 #include "llvm/MC/MCRegisterInfo.h"
64 #include "llvm/MC/MCSymbol.h"
65 #include "llvm/Support/AtomicOrdering.h"
66 #include "llvm/Support/Casting.h"
67 #include "llvm/Support/CodeGen.h"
68 #include "llvm/Support/Compiler.h"
69 #include "llvm/Support/ErrorHandling.h"
70 #include "llvm/Support/MathExtras.h"
71 #include <algorithm>
72 #include <cassert>
73 #include <cstdint>
74 #include <iterator>
75 #include <utility>
76 
77 using namespace llvm;
78 
79 namespace {
80 
81 class AArch64FastISel final : public FastISel {
82   class Address {
83   public:
84     using BaseKind = enum {
85       RegBase,
86       FrameIndexBase
87     };
88 
89   private:
90     BaseKind Kind = RegBase;
91     AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;
92     union {
93       unsigned Reg;
94       int FI;
95     } Base;
96     unsigned OffsetReg = 0;
97     unsigned Shift = 0;
98     int64_t Offset = 0;
99     const GlobalValue *GV = nullptr;
100 
101   public:
Address()102     Address() { Base.Reg = 0; }
103 
setKind(BaseKind K)104     void setKind(BaseKind K) { Kind = K; }
getKind() const105     BaseKind getKind() const { return Kind; }
setExtendType(AArch64_AM::ShiftExtendType E)106     void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
getExtendType() const107     AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
isRegBase() const108     bool isRegBase() const { return Kind == RegBase; }
isFIBase() const109     bool isFIBase() const { return Kind == FrameIndexBase; }
110 
setReg(unsigned Reg)111     void setReg(unsigned Reg) {
112       assert(isRegBase() && "Invalid base register access!");
113       Base.Reg = Reg;
114     }
115 
getReg() const116     unsigned getReg() const {
117       assert(isRegBase() && "Invalid base register access!");
118       return Base.Reg;
119     }
120 
setOffsetReg(unsigned Reg)121     void setOffsetReg(unsigned Reg) {
122       OffsetReg = Reg;
123     }
124 
getOffsetReg() const125     unsigned getOffsetReg() const {
126       return OffsetReg;
127     }
128 
setFI(unsigned FI)129     void setFI(unsigned FI) {
130       assert(isFIBase() && "Invalid base frame index  access!");
131       Base.FI = FI;
132     }
133 
getFI() const134     unsigned getFI() const {
135       assert(isFIBase() && "Invalid base frame index access!");
136       return Base.FI;
137     }
138 
setOffset(int64_t O)139     void setOffset(int64_t O) { Offset = O; }
getOffset()140     int64_t getOffset() { return Offset; }
setShift(unsigned S)141     void setShift(unsigned S) { Shift = S; }
getShift()142     unsigned getShift() { return Shift; }
143 
setGlobalValue(const GlobalValue * G)144     void setGlobalValue(const GlobalValue *G) { GV = G; }
getGlobalValue()145     const GlobalValue *getGlobalValue() { return GV; }
146   };
147 
148   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
149   /// make the right decision when generating code for different targets.
150   const AArch64Subtarget *Subtarget;
151   LLVMContext *Context;
152 
153   bool fastLowerArguments() override;
154   bool fastLowerCall(CallLoweringInfo &CLI) override;
155   bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
156 
157 private:
158   // Selection routines.
159   bool selectAddSub(const Instruction *I);
160   bool selectLogicalOp(const Instruction *I);
161   bool selectLoad(const Instruction *I);
162   bool selectStore(const Instruction *I);
163   bool selectBranch(const Instruction *I);
164   bool selectIndirectBr(const Instruction *I);
165   bool selectCmp(const Instruction *I);
166   bool selectSelect(const Instruction *I);
167   bool selectFPExt(const Instruction *I);
168   bool selectFPTrunc(const Instruction *I);
169   bool selectFPToInt(const Instruction *I, bool Signed);
170   bool selectIntToFP(const Instruction *I, bool Signed);
171   bool selectRem(const Instruction *I, unsigned ISDOpcode);
172   bool selectRet(const Instruction *I);
173   bool selectTrunc(const Instruction *I);
174   bool selectIntExt(const Instruction *I);
175   bool selectMul(const Instruction *I);
176   bool selectShift(const Instruction *I);
177   bool selectBitCast(const Instruction *I);
178   bool selectFRem(const Instruction *I);
179   bool selectSDiv(const Instruction *I);
180   bool selectGetElementPtr(const Instruction *I);
181   bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
182 
183   // Utility helper routines.
184   bool isTypeLegal(Type *Ty, MVT &VT);
185   bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
186   bool isValueAvailable(const Value *V) const;
187   bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
188   bool computeCallAddress(const Value *V, Address &Addr);
189   bool simplifyAddress(Address &Addr, MVT VT);
190   void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
191                             MachineMemOperand::Flags Flags,
192                             unsigned ScaleFactor, MachineMemOperand *MMO);
193   bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment);
194   bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
195                           MaybeAlign Alignment);
196   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
197                          const Value *Cond);
198   bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
199   bool optimizeSelect(const SelectInst *SI);
200   unsigned getRegForGEPIndex(const Value *Idx);
201 
202   // Emit helper routines.
203   unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
204                       const Value *RHS, bool SetFlags = false,
205                       bool WantResult = true,  bool IsZExt = false);
206   unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
207                          unsigned RHSReg, bool SetFlags = false,
208                          bool WantResult = true);
209   unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
210                          uint64_t Imm, bool SetFlags = false,
211                          bool WantResult = true);
212   unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
213                          unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType,
214                          uint64_t ShiftImm, bool SetFlags = false,
215                          bool WantResult = true);
216   unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
217                          unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType,
218                          uint64_t ShiftImm, bool SetFlags = false,
219                          bool WantResult = true);
220 
221   // Emit functions.
222   bool emitCompareAndBranch(const BranchInst *BI);
223   bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
224   bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
225   bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
226   bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
227   unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
228                     MachineMemOperand *MMO = nullptr);
229   bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
230                  MachineMemOperand *MMO = nullptr);
231   bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
232                         MachineMemOperand *MMO = nullptr);
233   unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
234   unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
235   unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
236                    bool SetFlags = false, bool WantResult = true,
237                    bool IsZExt = false);
238   unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm);
239   unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
240                    bool SetFlags = false, bool WantResult = true,
241                    bool IsZExt = false);
242   unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
243                        bool WantResult = true);
244   unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
245                        AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
246                        bool WantResult = true);
247   unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
248                          const Value *RHS);
249   unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
250                             uint64_t Imm);
251   unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
252                             unsigned RHSReg, uint64_t ShiftImm);
253   unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
254   unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1);
255   unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
256   unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
257   unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
258   unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
259                       bool IsZExt = true);
260   unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
261   unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
262                       bool IsZExt = true);
263   unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
264   unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
265                       bool IsZExt = false);
266 
267   unsigned materializeInt(const ConstantInt *CI, MVT VT);
268   unsigned materializeFP(const ConstantFP *CFP, MVT VT);
269   unsigned materializeGV(const GlobalValue *GV);
270 
271   // Call handling routines.
272 private:
273   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
274   bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
275                        unsigned &NumBytes);
276   bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes);
277 
278 public:
279   // Backend specific FastISel code.
280   unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
281   unsigned fastMaterializeConstant(const Constant *C) override;
282   unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
283 
AArch64FastISel(FunctionLoweringInfo & FuncInfo,const TargetLibraryInfo * LibInfo)284   explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
285                            const TargetLibraryInfo *LibInfo)
286       : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
287     Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>();
288     Context = &FuncInfo.Fn->getContext();
289   }
290 
291   bool fastSelectInstruction(const Instruction *I) override;
292 
293 #include "AArch64GenFastISel.inc"
294 };
295 
296 } // end anonymous namespace
297 
298 /// Check if the sign-/zero-extend will be a noop.
isIntExtFree(const Instruction * I)299 static bool isIntExtFree(const Instruction *I) {
300   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
301          "Unexpected integer extend instruction.");
302   assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
303          "Unexpected value type.");
304   bool IsZExt = isa<ZExtInst>(I);
305 
306   if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
307     if (LI->hasOneUse())
308       return true;
309 
310   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
311     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
312       return true;
313 
314   return false;
315 }
316 
317 /// Determine the implicit scale factor that is applied by a memory
318 /// operation for a given value type.
getImplicitScaleFactor(MVT VT)319 static unsigned getImplicitScaleFactor(MVT VT) {
320   switch (VT.SimpleTy) {
321   default:
322     return 0;    // invalid
323   case MVT::i1:  // fall-through
324   case MVT::i8:
325     return 1;
326   case MVT::i16:
327     return 2;
328   case MVT::i32: // fall-through
329   case MVT::f32:
330     return 4;
331   case MVT::i64: // fall-through
332   case MVT::f64:
333     return 8;
334   }
335 }
336 
CCAssignFnForCall(CallingConv::ID CC) const337 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
338   if (CC == CallingConv::GHC)
339     return CC_AArch64_GHC;
340   if (CC == CallingConv::CFGuard_Check)
341     return CC_AArch64_Win64_CFGuard_Check;
342   if (Subtarget->isTargetDarwin())
343     return CC_AArch64_DarwinPCS;
344   if (Subtarget->isTargetWindows())
345     return CC_AArch64_Win64PCS;
346   return CC_AArch64_AAPCS;
347 }
348 
fastMaterializeAlloca(const AllocaInst * AI)349 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
350   assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
351          "Alloca should always return a pointer.");
352 
353   // Don't handle dynamic allocas.
354   if (!FuncInfo.StaticAllocaMap.count(AI))
355     return 0;
356 
357   DenseMap<const AllocaInst *, int>::iterator SI =
358       FuncInfo.StaticAllocaMap.find(AI);
359 
360   if (SI != FuncInfo.StaticAllocaMap.end()) {
361     Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
362     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
363             ResultReg)
364         .addFrameIndex(SI->second)
365         .addImm(0)
366         .addImm(0);
367     return ResultReg;
368   }
369 
370   return 0;
371 }
372 
materializeInt(const ConstantInt * CI,MVT VT)373 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
374   if (VT > MVT::i64)
375     return 0;
376 
377   if (!CI->isZero())
378     return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
379 
380   // Create a copy from the zero register to materialize a "0" value.
381   const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
382                                                    : &AArch64::GPR32RegClass;
383   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
384   Register ResultReg = createResultReg(RC);
385   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
386           ResultReg).addReg(ZeroReg, getKillRegState(true));
387   return ResultReg;
388 }
389 
materializeFP(const ConstantFP * CFP,MVT VT)390 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
391   // Positive zero (+0.0) has to be materialized with a fmov from the zero
392   // register, because the immediate version of fmov cannot encode zero.
393   if (CFP->isNullValue())
394     return fastMaterializeFloatZero(CFP);
395 
396   if (VT != MVT::f32 && VT != MVT::f64)
397     return 0;
398 
399   const APFloat Val = CFP->getValueAPF();
400   bool Is64Bit = (VT == MVT::f64);
401   // This checks to see if we can use FMOV instructions to materialize
402   // a constant, otherwise we have to materialize via the constant pool.
403   int Imm =
404       Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
405   if (Imm != -1) {
406     unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
407     return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
408   }
409 
410   // For the large code model materialize the FP constant in code.
411   if (TM.getCodeModel() == CodeModel::Large) {
412     unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
413     const TargetRegisterClass *RC = Is64Bit ?
414         &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
415 
416     Register TmpReg = createResultReg(RC);
417     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc1), TmpReg)
418         .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
419 
420     Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
421     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
422             TII.get(TargetOpcode::COPY), ResultReg)
423         .addReg(TmpReg, getKillRegState(true));
424 
425     return ResultReg;
426   }
427 
428   // Materialize via constant pool.  MachineConstantPool wants an explicit
429   // alignment.
430   Align Alignment = DL.getPrefTypeAlign(CFP->getType());
431 
432   unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
433   Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
434   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
435           ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
436 
437   unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
438   Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
439   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
440       .addReg(ADRPReg)
441       .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
442   return ResultReg;
443 }
444 
materializeGV(const GlobalValue * GV)445 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
446   // We can't handle thread-local variables quickly yet.
447   if (GV->isThreadLocal())
448     return 0;
449 
450   // MachO still uses GOT for large code-model accesses, but ELF requires
451   // movz/movk sequences, which FastISel doesn't handle yet.
452   if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
453     return 0;
454 
455   unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
456 
457   EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
458   if (!DestEVT.isSimple())
459     return 0;
460 
461   Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
462   unsigned ResultReg;
463 
464   if (OpFlags & AArch64II::MO_GOT) {
465     // ADRP + LDRX
466     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
467             ADRPReg)
468         .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
469 
470     unsigned LdrOpc;
471     if (Subtarget->isTargetILP32()) {
472       ResultReg = createResultReg(&AArch64::GPR32RegClass);
473       LdrOpc = AArch64::LDRWui;
474     } else {
475       ResultReg = createResultReg(&AArch64::GPR64RegClass);
476       LdrOpc = AArch64::LDRXui;
477     }
478     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(LdrOpc),
479             ResultReg)
480       .addReg(ADRPReg)
481       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
482                         AArch64II::MO_NC | OpFlags);
483     if (!Subtarget->isTargetILP32())
484       return ResultReg;
485 
486     // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
487     // so we must extend the result on ILP32.
488     Register Result64 = createResultReg(&AArch64::GPR64RegClass);
489     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
490             TII.get(TargetOpcode::SUBREG_TO_REG))
491         .addDef(Result64)
492         .addImm(0)
493         .addReg(ResultReg, RegState::Kill)
494         .addImm(AArch64::sub_32);
495     return Result64;
496   } else {
497     // ADRP + ADDX
498     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
499             ADRPReg)
500         .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
501 
502     if (OpFlags & AArch64II::MO_TAGGED) {
503       // MO_TAGGED on the page indicates a tagged address. Set the tag now.
504       // We do so by creating a MOVK that sets bits 48-63 of the register to
505       // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
506       // the small code model so we can assume a binary size of <= 4GB, which
507       // makes the untagged PC relative offset positive. The binary must also be
508       // loaded into address range [0, 2^48). Both of these properties need to
509       // be ensured at runtime when using tagged addresses.
510       //
511       // TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that
512       // also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands
513       // are not exactly 1:1 with FastISel so we cannot easily abstract this
514       // out. At some point, it would be nice to find a way to not have this
515       // duplciate code.
516       unsigned DstReg = createResultReg(&AArch64::GPR64commonRegClass);
517       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::MOVKXi),
518               DstReg)
519           .addReg(ADRPReg)
520           .addGlobalAddress(GV, /*Offset=*/0x100000000,
521                             AArch64II::MO_PREL | AArch64II::MO_G3)
522           .addImm(48);
523       ADRPReg = DstReg;
524     }
525 
526     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
527     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
528             ResultReg)
529         .addReg(ADRPReg)
530         .addGlobalAddress(GV, 0,
531                           AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags)
532         .addImm(0);
533   }
534   return ResultReg;
535 }
536 
fastMaterializeConstant(const Constant * C)537 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
538   EVT CEVT = TLI.getValueType(DL, C->getType(), true);
539 
540   // Only handle simple types.
541   if (!CEVT.isSimple())
542     return 0;
543   MVT VT = CEVT.getSimpleVT();
544   // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
545   // 'null' pointers need to have a somewhat special treatment.
546   if (isa<ConstantPointerNull>(C)) {
547     assert(VT == MVT::i64 && "Expected 64-bit pointers");
548     return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
549   }
550 
551   if (const auto *CI = dyn_cast<ConstantInt>(C))
552     return materializeInt(CI, VT);
553   else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
554     return materializeFP(CFP, VT);
555   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
556     return materializeGV(GV);
557 
558   return 0;
559 }
560 
fastMaterializeFloatZero(const ConstantFP * CFP)561 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
562   assert(CFP->isNullValue() &&
563          "Floating-point constant is not a positive zero.");
564   MVT VT;
565   if (!isTypeLegal(CFP->getType(), VT))
566     return 0;
567 
568   if (VT != MVT::f32 && VT != MVT::f64)
569     return 0;
570 
571   bool Is64Bit = (VT == MVT::f64);
572   unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
573   unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
574   return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);
575 }
576 
577 /// Check if the multiply is by a power-of-2 constant.
isMulPowOf2(const Value * I)578 static bool isMulPowOf2(const Value *I) {
579   if (const auto *MI = dyn_cast<MulOperator>(I)) {
580     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
581       if (C->getValue().isPowerOf2())
582         return true;
583     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
584       if (C->getValue().isPowerOf2())
585         return true;
586   }
587   return false;
588 }
589 
590 // Computes the address to get to an object.
computeAddress(const Value * Obj,Address & Addr,Type * Ty)591 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
592 {
593   const User *U = nullptr;
594   unsigned Opcode = Instruction::UserOp1;
595   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
596     // Don't walk into other basic blocks unless the object is an alloca from
597     // another block, otherwise it may not have a virtual register assigned.
598     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
599         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
600       Opcode = I->getOpcode();
601       U = I;
602     }
603   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
604     Opcode = C->getOpcode();
605     U = C;
606   }
607 
608   if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
609     if (Ty->getAddressSpace() > 255)
610       // Fast instruction selection doesn't support the special
611       // address spaces.
612       return false;
613 
614   switch (Opcode) {
615   default:
616     break;
617   case Instruction::BitCast:
618     // Look through bitcasts.
619     return computeAddress(U->getOperand(0), Addr, Ty);
620 
621   case Instruction::IntToPtr:
622     // Look past no-op inttoptrs.
623     if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
624         TLI.getPointerTy(DL))
625       return computeAddress(U->getOperand(0), Addr, Ty);
626     break;
627 
628   case Instruction::PtrToInt:
629     // Look past no-op ptrtoints.
630     if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
631       return computeAddress(U->getOperand(0), Addr, Ty);
632     break;
633 
634   case Instruction::GetElementPtr: {
635     Address SavedAddr = Addr;
636     uint64_t TmpOffset = Addr.getOffset();
637 
638     // Iterate through the GEP folding the constants into offsets where
639     // we can.
640     for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
641          GTI != E; ++GTI) {
642       const Value *Op = GTI.getOperand();
643       if (StructType *STy = GTI.getStructTypeOrNull()) {
644         const StructLayout *SL = DL.getStructLayout(STy);
645         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
646         TmpOffset += SL->getElementOffset(Idx);
647       } else {
648         uint64_t S = GTI.getSequentialElementStride(DL);
649         while (true) {
650           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
651             // Constant-offset addressing.
652             TmpOffset += CI->getSExtValue() * S;
653             break;
654           }
655           if (canFoldAddIntoGEP(U, Op)) {
656             // A compatible add with a constant operand. Fold the constant.
657             ConstantInt *CI =
658                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
659             TmpOffset += CI->getSExtValue() * S;
660             // Iterate on the other operand.
661             Op = cast<AddOperator>(Op)->getOperand(0);
662             continue;
663           }
664           // Unsupported
665           goto unsupported_gep;
666         }
667       }
668     }
669 
670     // Try to grab the base operand now.
671     Addr.setOffset(TmpOffset);
672     if (computeAddress(U->getOperand(0), Addr, Ty))
673       return true;
674 
675     // We failed, restore everything and try the other options.
676     Addr = SavedAddr;
677 
678   unsupported_gep:
679     break;
680   }
681   case Instruction::Alloca: {
682     const AllocaInst *AI = cast<AllocaInst>(Obj);
683     DenseMap<const AllocaInst *, int>::iterator SI =
684         FuncInfo.StaticAllocaMap.find(AI);
685     if (SI != FuncInfo.StaticAllocaMap.end()) {
686       Addr.setKind(Address::FrameIndexBase);
687       Addr.setFI(SI->second);
688       return true;
689     }
690     break;
691   }
692   case Instruction::Add: {
693     // Adds of constants are common and easy enough.
694     const Value *LHS = U->getOperand(0);
695     const Value *RHS = U->getOperand(1);
696 
697     if (isa<ConstantInt>(LHS))
698       std::swap(LHS, RHS);
699 
700     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
701       Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
702       return computeAddress(LHS, Addr, Ty);
703     }
704 
705     Address Backup = Addr;
706     if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
707       return true;
708     Addr = Backup;
709 
710     break;
711   }
712   case Instruction::Sub: {
713     // Subs of constants are common and easy enough.
714     const Value *LHS = U->getOperand(0);
715     const Value *RHS = U->getOperand(1);
716 
717     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
718       Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
719       return computeAddress(LHS, Addr, Ty);
720     }
721     break;
722   }
723   case Instruction::Shl: {
724     if (Addr.getOffsetReg())
725       break;
726 
727     const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
728     if (!CI)
729       break;
730 
731     unsigned Val = CI->getZExtValue();
732     if (Val < 1 || Val > 3)
733       break;
734 
735     uint64_t NumBytes = 0;
736     if (Ty && Ty->isSized()) {
737       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
738       NumBytes = NumBits / 8;
739       if (!isPowerOf2_64(NumBits))
740         NumBytes = 0;
741     }
742 
743     if (NumBytes != (1ULL << Val))
744       break;
745 
746     Addr.setShift(Val);
747     Addr.setExtendType(AArch64_AM::LSL);
748 
749     const Value *Src = U->getOperand(0);
750     if (const auto *I = dyn_cast<Instruction>(Src)) {
751       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
752         // Fold the zext or sext when it won't become a noop.
753         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
754           if (!isIntExtFree(ZE) &&
755               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
756             Addr.setExtendType(AArch64_AM::UXTW);
757             Src = ZE->getOperand(0);
758           }
759         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
760           if (!isIntExtFree(SE) &&
761               SE->getOperand(0)->getType()->isIntegerTy(32)) {
762             Addr.setExtendType(AArch64_AM::SXTW);
763             Src = SE->getOperand(0);
764           }
765         }
766       }
767     }
768 
769     if (const auto *AI = dyn_cast<BinaryOperator>(Src))
770       if (AI->getOpcode() == Instruction::And) {
771         const Value *LHS = AI->getOperand(0);
772         const Value *RHS = AI->getOperand(1);
773 
774         if (const auto *C = dyn_cast<ConstantInt>(LHS))
775           if (C->getValue() == 0xffffffff)
776             std::swap(LHS, RHS);
777 
778         if (const auto *C = dyn_cast<ConstantInt>(RHS))
779           if (C->getValue() == 0xffffffff) {
780             Addr.setExtendType(AArch64_AM::UXTW);
781             Register Reg = getRegForValue(LHS);
782             if (!Reg)
783               return false;
784             Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
785             Addr.setOffsetReg(Reg);
786             return true;
787           }
788       }
789 
790     Register Reg = getRegForValue(Src);
791     if (!Reg)
792       return false;
793     Addr.setOffsetReg(Reg);
794     return true;
795   }
796   case Instruction::Mul: {
797     if (Addr.getOffsetReg())
798       break;
799 
800     if (!isMulPowOf2(U))
801       break;
802 
803     const Value *LHS = U->getOperand(0);
804     const Value *RHS = U->getOperand(1);
805 
806     // Canonicalize power-of-2 value to the RHS.
807     if (const auto *C = dyn_cast<ConstantInt>(LHS))
808       if (C->getValue().isPowerOf2())
809         std::swap(LHS, RHS);
810 
811     assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
812     const auto *C = cast<ConstantInt>(RHS);
813     unsigned Val = C->getValue().logBase2();
814     if (Val < 1 || Val > 3)
815       break;
816 
817     uint64_t NumBytes = 0;
818     if (Ty && Ty->isSized()) {
819       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
820       NumBytes = NumBits / 8;
821       if (!isPowerOf2_64(NumBits))
822         NumBytes = 0;
823     }
824 
825     if (NumBytes != (1ULL << Val))
826       break;
827 
828     Addr.setShift(Val);
829     Addr.setExtendType(AArch64_AM::LSL);
830 
831     const Value *Src = LHS;
832     if (const auto *I = dyn_cast<Instruction>(Src)) {
833       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
834         // Fold the zext or sext when it won't become a noop.
835         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
836           if (!isIntExtFree(ZE) &&
837               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
838             Addr.setExtendType(AArch64_AM::UXTW);
839             Src = ZE->getOperand(0);
840           }
841         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
842           if (!isIntExtFree(SE) &&
843               SE->getOperand(0)->getType()->isIntegerTy(32)) {
844             Addr.setExtendType(AArch64_AM::SXTW);
845             Src = SE->getOperand(0);
846           }
847         }
848       }
849     }
850 
851     Register Reg = getRegForValue(Src);
852     if (!Reg)
853       return false;
854     Addr.setOffsetReg(Reg);
855     return true;
856   }
857   case Instruction::And: {
858     if (Addr.getOffsetReg())
859       break;
860 
861     if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
862       break;
863 
864     const Value *LHS = U->getOperand(0);
865     const Value *RHS = U->getOperand(1);
866 
867     if (const auto *C = dyn_cast<ConstantInt>(LHS))
868       if (C->getValue() == 0xffffffff)
869         std::swap(LHS, RHS);
870 
871     if (const auto *C = dyn_cast<ConstantInt>(RHS))
872       if (C->getValue() == 0xffffffff) {
873         Addr.setShift(0);
874         Addr.setExtendType(AArch64_AM::LSL);
875         Addr.setExtendType(AArch64_AM::UXTW);
876 
877         Register Reg = getRegForValue(LHS);
878         if (!Reg)
879           return false;
880         Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
881         Addr.setOffsetReg(Reg);
882         return true;
883       }
884     break;
885   }
886   case Instruction::SExt:
887   case Instruction::ZExt: {
888     if (!Addr.getReg() || Addr.getOffsetReg())
889       break;
890 
891     const Value *Src = nullptr;
892     // Fold the zext or sext when it won't become a noop.
893     if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
894       if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
895         Addr.setExtendType(AArch64_AM::UXTW);
896         Src = ZE->getOperand(0);
897       }
898     } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
899       if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
900         Addr.setExtendType(AArch64_AM::SXTW);
901         Src = SE->getOperand(0);
902       }
903     }
904 
905     if (!Src)
906       break;
907 
908     Addr.setShift(0);
909     Register Reg = getRegForValue(Src);
910     if (!Reg)
911       return false;
912     Addr.setOffsetReg(Reg);
913     return true;
914   }
915   } // end switch
916 
917   if (Addr.isRegBase() && !Addr.getReg()) {
918     Register Reg = getRegForValue(Obj);
919     if (!Reg)
920       return false;
921     Addr.setReg(Reg);
922     return true;
923   }
924 
925   if (!Addr.getOffsetReg()) {
926     Register Reg = getRegForValue(Obj);
927     if (!Reg)
928       return false;
929     Addr.setOffsetReg(Reg);
930     return true;
931   }
932 
933   return false;
934 }
935 
computeCallAddress(const Value * V,Address & Addr)936 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
937   const User *U = nullptr;
938   unsigned Opcode = Instruction::UserOp1;
939   bool InMBB = true;
940 
941   if (const auto *I = dyn_cast<Instruction>(V)) {
942     Opcode = I->getOpcode();
943     U = I;
944     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
945   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
946     Opcode = C->getOpcode();
947     U = C;
948   }
949 
950   switch (Opcode) {
951   default: break;
952   case Instruction::BitCast:
953     // Look past bitcasts if its operand is in the same BB.
954     if (InMBB)
955       return computeCallAddress(U->getOperand(0), Addr);
956     break;
957   case Instruction::IntToPtr:
958     // Look past no-op inttoptrs if its operand is in the same BB.
959     if (InMBB &&
960         TLI.getValueType(DL, U->getOperand(0)->getType()) ==
961             TLI.getPointerTy(DL))
962       return computeCallAddress(U->getOperand(0), Addr);
963     break;
964   case Instruction::PtrToInt:
965     // Look past no-op ptrtoints if its operand is in the same BB.
966     if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
967       return computeCallAddress(U->getOperand(0), Addr);
968     break;
969   }
970 
971   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
972     Addr.setGlobalValue(GV);
973     return true;
974   }
975 
976   // If all else fails, try to materialize the value in a register.
977   if (!Addr.getGlobalValue()) {
978     Addr.setReg(getRegForValue(V));
979     return Addr.getReg() != 0;
980   }
981 
982   return false;
983 }
984 
isTypeLegal(Type * Ty,MVT & VT)985 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
986   EVT evt = TLI.getValueType(DL, Ty, true);
987 
988   if (Subtarget->isTargetILP32() && Ty->isPointerTy())
989     return false;
990 
991   // Only handle simple types.
992   if (evt == MVT::Other || !evt.isSimple())
993     return false;
994   VT = evt.getSimpleVT();
995 
996   // This is a legal type, but it's not something we handle in fast-isel.
997   if (VT == MVT::f128)
998     return false;
999 
1000   // Handle all other legal types, i.e. a register that will directly hold this
1001   // value.
1002   return TLI.isTypeLegal(VT);
1003 }
1004 
1005 /// Determine if the value type is supported by FastISel.
1006 ///
1007 /// FastISel for AArch64 can handle more value types than are legal. This adds
1008 /// simple value type such as i1, i8, and i16.
isTypeSupported(Type * Ty,MVT & VT,bool IsVectorAllowed)1009 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
1010   if (Ty->isVectorTy() && !IsVectorAllowed)
1011     return false;
1012 
1013   if (isTypeLegal(Ty, VT))
1014     return true;
1015 
1016   // If this is a type than can be sign or zero-extended to a basic operation
1017   // go ahead and accept it now.
1018   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
1019     return true;
1020 
1021   return false;
1022 }
1023 
isValueAvailable(const Value * V) const1024 bool AArch64FastISel::isValueAvailable(const Value *V) const {
1025   if (!isa<Instruction>(V))
1026     return true;
1027 
1028   const auto *I = cast<Instruction>(V);
1029   return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
1030 }
1031 
simplifyAddress(Address & Addr,MVT VT)1032 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
1033   if (Subtarget->isTargetILP32())
1034     return false;
1035 
1036   unsigned ScaleFactor = getImplicitScaleFactor(VT);
1037   if (!ScaleFactor)
1038     return false;
1039 
1040   bool ImmediateOffsetNeedsLowering = false;
1041   bool RegisterOffsetNeedsLowering = false;
1042   int64_t Offset = Addr.getOffset();
1043   if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1044     ImmediateOffsetNeedsLowering = true;
1045   else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1046            !isUInt<12>(Offset / ScaleFactor))
1047     ImmediateOffsetNeedsLowering = true;
1048 
1049   // Cannot encode an offset register and an immediate offset in the same
1050   // instruction. Fold the immediate offset into the load/store instruction and
1051   // emit an additional add to take care of the offset register.
1052   if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1053     RegisterOffsetNeedsLowering = true;
1054 
1055   // Cannot encode zero register as base.
1056   if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1057     RegisterOffsetNeedsLowering = true;
1058 
1059   // If this is a stack pointer and the offset needs to be simplified then put
1060   // the alloca address into a register, set the base type back to register and
1061   // continue. This should almost never happen.
1062   if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1063   {
1064     Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1065     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
1066             ResultReg)
1067       .addFrameIndex(Addr.getFI())
1068       .addImm(0)
1069       .addImm(0);
1070     Addr.setKind(Address::RegBase);
1071     Addr.setReg(ResultReg);
1072   }
1073 
1074   if (RegisterOffsetNeedsLowering) {
1075     unsigned ResultReg = 0;
1076     if (Addr.getReg()) {
1077       if (Addr.getExtendType() == AArch64_AM::SXTW ||
1078           Addr.getExtendType() == AArch64_AM::UXTW   )
1079         ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1080                                   Addr.getOffsetReg(), Addr.getExtendType(),
1081                                   Addr.getShift());
1082       else
1083         ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1084                                   Addr.getOffsetReg(), AArch64_AM::LSL,
1085                                   Addr.getShift());
1086     } else {
1087       if (Addr.getExtendType() == AArch64_AM::UXTW)
1088         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1089                                Addr.getShift(), /*IsZExt=*/true);
1090       else if (Addr.getExtendType() == AArch64_AM::SXTW)
1091         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1092                                Addr.getShift(), /*IsZExt=*/false);
1093       else
1094         ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1095                                Addr.getShift());
1096     }
1097     if (!ResultReg)
1098       return false;
1099 
1100     Addr.setReg(ResultReg);
1101     Addr.setOffsetReg(0);
1102     Addr.setShift(0);
1103     Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1104   }
1105 
1106   // Since the offset is too large for the load/store instruction get the
1107   // reg+offset into a register.
1108   if (ImmediateOffsetNeedsLowering) {
1109     unsigned ResultReg;
1110     if (Addr.getReg())
1111       // Try to fold the immediate into the add instruction.
1112       ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);
1113     else
1114       ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1115 
1116     if (!ResultReg)
1117       return false;
1118     Addr.setReg(ResultReg);
1119     Addr.setOffset(0);
1120   }
1121   return true;
1122 }
1123 
addLoadStoreOperands(Address & Addr,const MachineInstrBuilder & MIB,MachineMemOperand::Flags Flags,unsigned ScaleFactor,MachineMemOperand * MMO)1124 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1125                                            const MachineInstrBuilder &MIB,
1126                                            MachineMemOperand::Flags Flags,
1127                                            unsigned ScaleFactor,
1128                                            MachineMemOperand *MMO) {
1129   int64_t Offset = Addr.getOffset() / ScaleFactor;
1130   // Frame base works a bit differently. Handle it separately.
1131   if (Addr.isFIBase()) {
1132     int FI = Addr.getFI();
1133     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
1134     // and alignment should be based on the VT.
1135     MMO = FuncInfo.MF->getMachineMemOperand(
1136         MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1137         MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
1138     // Now add the rest of the operands.
1139     MIB.addFrameIndex(FI).addImm(Offset);
1140   } else {
1141     assert(Addr.isRegBase() && "Unexpected address kind.");
1142     const MCInstrDesc &II = MIB->getDesc();
1143     unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1144     Addr.setReg(
1145       constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1146     Addr.setOffsetReg(
1147       constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1148     if (Addr.getOffsetReg()) {
1149       assert(Addr.getOffset() == 0 && "Unexpected offset");
1150       bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1151                       Addr.getExtendType() == AArch64_AM::SXTX;
1152       MIB.addReg(Addr.getReg());
1153       MIB.addReg(Addr.getOffsetReg());
1154       MIB.addImm(IsSigned);
1155       MIB.addImm(Addr.getShift() != 0);
1156     } else
1157       MIB.addReg(Addr.getReg()).addImm(Offset);
1158   }
1159 
1160   if (MMO)
1161     MIB.addMemOperand(MMO);
1162 }
1163 
emitAddSub(bool UseAdd,MVT RetVT,const Value * LHS,const Value * RHS,bool SetFlags,bool WantResult,bool IsZExt)1164 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1165                                      const Value *RHS, bool SetFlags,
1166                                      bool WantResult,  bool IsZExt) {
1167   AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
1168   bool NeedExtend = false;
1169   switch (RetVT.SimpleTy) {
1170   default:
1171     return 0;
1172   case MVT::i1:
1173     NeedExtend = true;
1174     break;
1175   case MVT::i8:
1176     NeedExtend = true;
1177     ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1178     break;
1179   case MVT::i16:
1180     NeedExtend = true;
1181     ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1182     break;
1183   case MVT::i32:  // fall-through
1184   case MVT::i64:
1185     break;
1186   }
1187   MVT SrcVT = RetVT;
1188   RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1189 
1190   // Canonicalize immediates to the RHS first.
1191   if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1192     std::swap(LHS, RHS);
1193 
1194   // Canonicalize mul by power of 2 to the RHS.
1195   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1196     if (isMulPowOf2(LHS))
1197       std::swap(LHS, RHS);
1198 
1199   // Canonicalize shift immediate to the RHS.
1200   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1201     if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1202       if (isa<ConstantInt>(SI->getOperand(1)))
1203         if (SI->getOpcode() == Instruction::Shl  ||
1204             SI->getOpcode() == Instruction::LShr ||
1205             SI->getOpcode() == Instruction::AShr   )
1206           std::swap(LHS, RHS);
1207 
1208   Register LHSReg = getRegForValue(LHS);
1209   if (!LHSReg)
1210     return 0;
1211 
1212   if (NeedExtend)
1213     LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1214 
1215   unsigned ResultReg = 0;
1216   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1217     uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1218     if (C->isNegative())
1219       ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,
1220                                 WantResult);
1221     else
1222       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
1223                                 WantResult);
1224   } else if (const auto *C = dyn_cast<Constant>(RHS))
1225     if (C->isNullValue())
1226       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);
1227 
1228   if (ResultReg)
1229     return ResultReg;
1230 
1231   // Only extend the RHS within the instruction if there is a valid extend type.
1232   if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1233       isValueAvailable(RHS)) {
1234     Register RHSReg = getRegForValue(RHS);
1235     if (!RHSReg)
1236       return 0;
1237     return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
1238                          SetFlags, WantResult);
1239   }
1240 
1241   // Check if the mul can be folded into the instruction.
1242   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1243     if (isMulPowOf2(RHS)) {
1244       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1245       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1246 
1247       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1248         if (C->getValue().isPowerOf2())
1249           std::swap(MulLHS, MulRHS);
1250 
1251       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1252       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1253       Register RHSReg = getRegForValue(MulLHS);
1254       if (!RHSReg)
1255         return 0;
1256       ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL,
1257                                 ShiftVal, SetFlags, WantResult);
1258       if (ResultReg)
1259         return ResultReg;
1260     }
1261   }
1262 
1263   // Check if the shift can be folded into the instruction.
1264   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1265     if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1266       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1267         AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
1268         switch (SI->getOpcode()) {
1269         default: break;
1270         case Instruction::Shl:  ShiftType = AArch64_AM::LSL; break;
1271         case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1272         case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1273         }
1274         uint64_t ShiftVal = C->getZExtValue();
1275         if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1276           Register RHSReg = getRegForValue(SI->getOperand(0));
1277           if (!RHSReg)
1278             return 0;
1279           ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
1280                                     ShiftVal, SetFlags, WantResult);
1281           if (ResultReg)
1282             return ResultReg;
1283         }
1284       }
1285     }
1286   }
1287 
1288   Register RHSReg = getRegForValue(RHS);
1289   if (!RHSReg)
1290     return 0;
1291 
1292   if (NeedExtend)
1293     RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1294 
1295   return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
1296 }
1297 
emitAddSub_rr(bool UseAdd,MVT RetVT,unsigned LHSReg,unsigned RHSReg,bool SetFlags,bool WantResult)1298 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1299                                         unsigned RHSReg, bool SetFlags,
1300                                         bool WantResult) {
1301   assert(LHSReg && RHSReg && "Invalid register number.");
1302 
1303   if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1304       RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1305     return 0;
1306 
1307   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1308     return 0;
1309 
1310   static const unsigned OpcTable[2][2][2] = {
1311     { { AArch64::SUBWrr,  AArch64::SUBXrr  },
1312       { AArch64::ADDWrr,  AArch64::ADDXrr  }  },
1313     { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1314       { AArch64::ADDSWrr, AArch64::ADDSXrr }  }
1315   };
1316   bool Is64Bit = RetVT == MVT::i64;
1317   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1318   const TargetRegisterClass *RC =
1319       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1320   unsigned ResultReg;
1321   if (WantResult)
1322     ResultReg = createResultReg(RC);
1323   else
1324     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1325 
1326   const MCInstrDesc &II = TII.get(Opc);
1327   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1328   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1329   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1330       .addReg(LHSReg)
1331       .addReg(RHSReg);
1332   return ResultReg;
1333 }
1334 
emitAddSub_ri(bool UseAdd,MVT RetVT,unsigned LHSReg,uint64_t Imm,bool SetFlags,bool WantResult)1335 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1336                                         uint64_t Imm, bool SetFlags,
1337                                         bool WantResult) {
1338   assert(LHSReg && "Invalid register number.");
1339 
1340   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1341     return 0;
1342 
1343   unsigned ShiftImm;
1344   if (isUInt<12>(Imm))
1345     ShiftImm = 0;
1346   else if ((Imm & 0xfff000) == Imm) {
1347     ShiftImm = 12;
1348     Imm >>= 12;
1349   } else
1350     return 0;
1351 
1352   static const unsigned OpcTable[2][2][2] = {
1353     { { AArch64::SUBWri,  AArch64::SUBXri  },
1354       { AArch64::ADDWri,  AArch64::ADDXri  }  },
1355     { { AArch64::SUBSWri, AArch64::SUBSXri },
1356       { AArch64::ADDSWri, AArch64::ADDSXri }  }
1357   };
1358   bool Is64Bit = RetVT == MVT::i64;
1359   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1360   const TargetRegisterClass *RC;
1361   if (SetFlags)
1362     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1363   else
1364     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1365   unsigned ResultReg;
1366   if (WantResult)
1367     ResultReg = createResultReg(RC);
1368   else
1369     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1370 
1371   const MCInstrDesc &II = TII.get(Opc);
1372   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1373   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1374       .addReg(LHSReg)
1375       .addImm(Imm)
1376       .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1377   return ResultReg;
1378 }
1379 
emitAddSub_rs(bool UseAdd,MVT RetVT,unsigned LHSReg,unsigned RHSReg,AArch64_AM::ShiftExtendType ShiftType,uint64_t ShiftImm,bool SetFlags,bool WantResult)1380 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1381                                         unsigned RHSReg,
1382                                         AArch64_AM::ShiftExtendType ShiftType,
1383                                         uint64_t ShiftImm, bool SetFlags,
1384                                         bool WantResult) {
1385   assert(LHSReg && RHSReg && "Invalid register number.");
1386   assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1387          RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1388 
1389   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1390     return 0;
1391 
1392   // Don't deal with undefined shifts.
1393   if (ShiftImm >= RetVT.getSizeInBits())
1394     return 0;
1395 
1396   static const unsigned OpcTable[2][2][2] = {
1397     { { AArch64::SUBWrs,  AArch64::SUBXrs  },
1398       { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
1399     { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1400       { AArch64::ADDSWrs, AArch64::ADDSXrs }  }
1401   };
1402   bool Is64Bit = RetVT == MVT::i64;
1403   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1404   const TargetRegisterClass *RC =
1405       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1406   unsigned ResultReg;
1407   if (WantResult)
1408     ResultReg = createResultReg(RC);
1409   else
1410     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1411 
1412   const MCInstrDesc &II = TII.get(Opc);
1413   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1414   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1415   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1416       .addReg(LHSReg)
1417       .addReg(RHSReg)
1418       .addImm(getShifterImm(ShiftType, ShiftImm));
1419   return ResultReg;
1420 }
1421 
emitAddSub_rx(bool UseAdd,MVT RetVT,unsigned LHSReg,unsigned RHSReg,AArch64_AM::ShiftExtendType ExtType,uint64_t ShiftImm,bool SetFlags,bool WantResult)1422 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1423                                         unsigned RHSReg,
1424                                         AArch64_AM::ShiftExtendType ExtType,
1425                                         uint64_t ShiftImm, bool SetFlags,
1426                                         bool WantResult) {
1427   assert(LHSReg && RHSReg && "Invalid register number.");
1428   assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1429          RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1430 
1431   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1432     return 0;
1433 
1434   if (ShiftImm >= 4)
1435     return 0;
1436 
1437   static const unsigned OpcTable[2][2][2] = {
1438     { { AArch64::SUBWrx,  AArch64::SUBXrx  },
1439       { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
1440     { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1441       { AArch64::ADDSWrx, AArch64::ADDSXrx }  }
1442   };
1443   bool Is64Bit = RetVT == MVT::i64;
1444   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1445   const TargetRegisterClass *RC = nullptr;
1446   if (SetFlags)
1447     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1448   else
1449     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1450   unsigned ResultReg;
1451   if (WantResult)
1452     ResultReg = createResultReg(RC);
1453   else
1454     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1455 
1456   const MCInstrDesc &II = TII.get(Opc);
1457   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1458   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1459   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1460       .addReg(LHSReg)
1461       .addReg(RHSReg)
1462       .addImm(getArithExtendImm(ExtType, ShiftImm));
1463   return ResultReg;
1464 }
1465 
emitCmp(const Value * LHS,const Value * RHS,bool IsZExt)1466 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1467   Type *Ty = LHS->getType();
1468   EVT EVT = TLI.getValueType(DL, Ty, true);
1469   if (!EVT.isSimple())
1470     return false;
1471   MVT VT = EVT.getSimpleVT();
1472 
1473   switch (VT.SimpleTy) {
1474   default:
1475     return false;
1476   case MVT::i1:
1477   case MVT::i8:
1478   case MVT::i16:
1479   case MVT::i32:
1480   case MVT::i64:
1481     return emitICmp(VT, LHS, RHS, IsZExt);
1482   case MVT::f32:
1483   case MVT::f64:
1484     return emitFCmp(VT, LHS, RHS);
1485   }
1486 }
1487 
emitICmp(MVT RetVT,const Value * LHS,const Value * RHS,bool IsZExt)1488 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1489                                bool IsZExt) {
1490   return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1491                  IsZExt) != 0;
1492 }
1493 
emitICmp_ri(MVT RetVT,unsigned LHSReg,uint64_t Imm)1494 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) {
1495   return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,
1496                        /*SetFlags=*/true, /*WantResult=*/false) != 0;
1497 }
1498 
emitFCmp(MVT RetVT,const Value * LHS,const Value * RHS)1499 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1500   if (RetVT != MVT::f32 && RetVT != MVT::f64)
1501     return false;
1502 
1503   // Check to see if the 2nd operand is a constant that we can encode directly
1504   // in the compare.
1505   bool UseImm = false;
1506   if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1507     if (CFP->isZero() && !CFP->isNegative())
1508       UseImm = true;
1509 
1510   Register LHSReg = getRegForValue(LHS);
1511   if (!LHSReg)
1512     return false;
1513 
1514   if (UseImm) {
1515     unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1516     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1517         .addReg(LHSReg);
1518     return true;
1519   }
1520 
1521   Register RHSReg = getRegForValue(RHS);
1522   if (!RHSReg)
1523     return false;
1524 
1525   unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1526   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1527       .addReg(LHSReg)
1528       .addReg(RHSReg);
1529   return true;
1530 }
1531 
emitAdd(MVT RetVT,const Value * LHS,const Value * RHS,bool SetFlags,bool WantResult,bool IsZExt)1532 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1533                                   bool SetFlags, bool WantResult, bool IsZExt) {
1534   return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1535                     IsZExt);
1536 }
1537 
1538 /// This method is a wrapper to simplify add emission.
1539 ///
1540 /// First try to emit an add with an immediate operand using emitAddSub_ri. If
1541 /// that fails, then try to materialize the immediate into a register and use
1542 /// emitAddSub_rr instead.
emitAdd_ri_(MVT VT,unsigned Op0,int64_t Imm)1543 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) {
1544   unsigned ResultReg;
1545   if (Imm < 0)
1546     ResultReg = emitAddSub_ri(false, VT, Op0, -Imm);
1547   else
1548     ResultReg = emitAddSub_ri(true, VT, Op0, Imm);
1549 
1550   if (ResultReg)
1551     return ResultReg;
1552 
1553   unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1554   if (!CReg)
1555     return 0;
1556 
1557   ResultReg = emitAddSub_rr(true, VT, Op0, CReg);
1558   return ResultReg;
1559 }
1560 
emitSub(MVT RetVT,const Value * LHS,const Value * RHS,bool SetFlags,bool WantResult,bool IsZExt)1561 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1562                                   bool SetFlags, bool WantResult, bool IsZExt) {
1563   return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1564                     IsZExt);
1565 }
1566 
emitSubs_rr(MVT RetVT,unsigned LHSReg,unsigned RHSReg,bool WantResult)1567 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1568                                       unsigned RHSReg, bool WantResult) {
1569   return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,
1570                        /*SetFlags=*/true, WantResult);
1571 }
1572 
emitSubs_rs(MVT RetVT,unsigned LHSReg,unsigned RHSReg,AArch64_AM::ShiftExtendType ShiftType,uint64_t ShiftImm,bool WantResult)1573 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1574                                       unsigned RHSReg,
1575                                       AArch64_AM::ShiftExtendType ShiftType,
1576                                       uint64_t ShiftImm, bool WantResult) {
1577   return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,
1578                        ShiftImm, /*SetFlags=*/true, WantResult);
1579 }
1580 
emitLogicalOp(unsigned ISDOpc,MVT RetVT,const Value * LHS,const Value * RHS)1581 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1582                                         const Value *LHS, const Value *RHS) {
1583   // Canonicalize immediates to the RHS first.
1584   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1585     std::swap(LHS, RHS);
1586 
1587   // Canonicalize mul by power-of-2 to the RHS.
1588   if (LHS->hasOneUse() && isValueAvailable(LHS))
1589     if (isMulPowOf2(LHS))
1590       std::swap(LHS, RHS);
1591 
1592   // Canonicalize shift immediate to the RHS.
1593   if (LHS->hasOneUse() && isValueAvailable(LHS))
1594     if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1595       if (isa<ConstantInt>(SI->getOperand(1)))
1596         std::swap(LHS, RHS);
1597 
1598   Register LHSReg = getRegForValue(LHS);
1599   if (!LHSReg)
1600     return 0;
1601 
1602   unsigned ResultReg = 0;
1603   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1604     uint64_t Imm = C->getZExtValue();
1605     ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
1606   }
1607   if (ResultReg)
1608     return ResultReg;
1609 
1610   // Check if the mul can be folded into the instruction.
1611   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1612     if (isMulPowOf2(RHS)) {
1613       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1614       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1615 
1616       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1617         if (C->getValue().isPowerOf2())
1618           std::swap(MulLHS, MulRHS);
1619 
1620       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1621       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1622 
1623       Register RHSReg = getRegForValue(MulLHS);
1624       if (!RHSReg)
1625         return 0;
1626       ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1627       if (ResultReg)
1628         return ResultReg;
1629     }
1630   }
1631 
1632   // Check if the shift can be folded into the instruction.
1633   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1634     if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1635       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1636         uint64_t ShiftVal = C->getZExtValue();
1637         Register RHSReg = getRegForValue(SI->getOperand(0));
1638         if (!RHSReg)
1639           return 0;
1640         ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1641         if (ResultReg)
1642           return ResultReg;
1643       }
1644   }
1645 
1646   Register RHSReg = getRegForValue(RHS);
1647   if (!RHSReg)
1648     return 0;
1649 
1650   MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1651   ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);
1652   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1653     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1654     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1655   }
1656   return ResultReg;
1657 }
1658 
emitLogicalOp_ri(unsigned ISDOpc,MVT RetVT,unsigned LHSReg,uint64_t Imm)1659 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1660                                            unsigned LHSReg, uint64_t Imm) {
1661   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1662                 "ISD nodes are not consecutive!");
1663   static const unsigned OpcTable[3][2] = {
1664     { AArch64::ANDWri, AArch64::ANDXri },
1665     { AArch64::ORRWri, AArch64::ORRXri },
1666     { AArch64::EORWri, AArch64::EORXri }
1667   };
1668   const TargetRegisterClass *RC;
1669   unsigned Opc;
1670   unsigned RegSize;
1671   switch (RetVT.SimpleTy) {
1672   default:
1673     return 0;
1674   case MVT::i1:
1675   case MVT::i8:
1676   case MVT::i16:
1677   case MVT::i32: {
1678     unsigned Idx = ISDOpc - ISD::AND;
1679     Opc = OpcTable[Idx][0];
1680     RC = &AArch64::GPR32spRegClass;
1681     RegSize = 32;
1682     break;
1683   }
1684   case MVT::i64:
1685     Opc = OpcTable[ISDOpc - ISD::AND][1];
1686     RC = &AArch64::GPR64spRegClass;
1687     RegSize = 64;
1688     break;
1689   }
1690 
1691   if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1692     return 0;
1693 
1694   Register ResultReg =
1695       fastEmitInst_ri(Opc, RC, LHSReg,
1696                       AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1697   if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1698     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1699     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1700   }
1701   return ResultReg;
1702 }
1703 
emitLogicalOp_rs(unsigned ISDOpc,MVT RetVT,unsigned LHSReg,unsigned RHSReg,uint64_t ShiftImm)1704 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1705                                            unsigned LHSReg, unsigned RHSReg,
1706                                            uint64_t ShiftImm) {
1707   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1708                 "ISD nodes are not consecutive!");
1709   static const unsigned OpcTable[3][2] = {
1710     { AArch64::ANDWrs, AArch64::ANDXrs },
1711     { AArch64::ORRWrs, AArch64::ORRXrs },
1712     { AArch64::EORWrs, AArch64::EORXrs }
1713   };
1714 
1715   // Don't deal with undefined shifts.
1716   if (ShiftImm >= RetVT.getSizeInBits())
1717     return 0;
1718 
1719   const TargetRegisterClass *RC;
1720   unsigned Opc;
1721   switch (RetVT.SimpleTy) {
1722   default:
1723     return 0;
1724   case MVT::i1:
1725   case MVT::i8:
1726   case MVT::i16:
1727   case MVT::i32:
1728     Opc = OpcTable[ISDOpc - ISD::AND][0];
1729     RC = &AArch64::GPR32RegClass;
1730     break;
1731   case MVT::i64:
1732     Opc = OpcTable[ISDOpc - ISD::AND][1];
1733     RC = &AArch64::GPR64RegClass;
1734     break;
1735   }
1736   Register ResultReg =
1737       fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
1738                        AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
1739   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1740     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1741     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1742   }
1743   return ResultReg;
1744 }
1745 
emitAnd_ri(MVT RetVT,unsigned LHSReg,uint64_t Imm)1746 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg,
1747                                      uint64_t Imm) {
1748   return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm);
1749 }
1750 
emitLoad(MVT VT,MVT RetVT,Address Addr,bool WantZExt,MachineMemOperand * MMO)1751 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1752                                    bool WantZExt, MachineMemOperand *MMO) {
1753   if (!TLI.allowsMisalignedMemoryAccesses(VT))
1754     return 0;
1755 
1756   // Simplify this down to something we can handle.
1757   if (!simplifyAddress(Addr, VT))
1758     return 0;
1759 
1760   unsigned ScaleFactor = getImplicitScaleFactor(VT);
1761   if (!ScaleFactor)
1762     llvm_unreachable("Unexpected value type.");
1763 
1764   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1765   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1766   bool UseScaled = true;
1767   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1768     UseScaled = false;
1769     ScaleFactor = 1;
1770   }
1771 
1772   static const unsigned GPOpcTable[2][8][4] = {
1773     // Sign-extend.
1774     { { AArch64::LDURSBWi,  AArch64::LDURSHWi,  AArch64::LDURWi,
1775         AArch64::LDURXi  },
1776       { AArch64::LDURSBXi,  AArch64::LDURSHXi,  AArch64::LDURSWi,
1777         AArch64::LDURXi  },
1778       { AArch64::LDRSBWui,  AArch64::LDRSHWui,  AArch64::LDRWui,
1779         AArch64::LDRXui  },
1780       { AArch64::LDRSBXui,  AArch64::LDRSHXui,  AArch64::LDRSWui,
1781         AArch64::LDRXui  },
1782       { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1783         AArch64::LDRXroX },
1784       { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1785         AArch64::LDRXroX },
1786       { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1787         AArch64::LDRXroW },
1788       { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1789         AArch64::LDRXroW }
1790     },
1791     // Zero-extend.
1792     { { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1793         AArch64::LDURXi  },
1794       { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1795         AArch64::LDURXi  },
1796       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1797         AArch64::LDRXui  },
1798       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1799         AArch64::LDRXui  },
1800       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1801         AArch64::LDRXroX },
1802       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1803         AArch64::LDRXroX },
1804       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1805         AArch64::LDRXroW },
1806       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1807         AArch64::LDRXroW }
1808     }
1809   };
1810 
1811   static const unsigned FPOpcTable[4][2] = {
1812     { AArch64::LDURSi,  AArch64::LDURDi  },
1813     { AArch64::LDRSui,  AArch64::LDRDui  },
1814     { AArch64::LDRSroX, AArch64::LDRDroX },
1815     { AArch64::LDRSroW, AArch64::LDRDroW }
1816   };
1817 
1818   unsigned Opc;
1819   const TargetRegisterClass *RC;
1820   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1821                       Addr.getOffsetReg();
1822   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1823   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1824       Addr.getExtendType() == AArch64_AM::SXTW)
1825     Idx++;
1826 
1827   bool IsRet64Bit = RetVT == MVT::i64;
1828   switch (VT.SimpleTy) {
1829   default:
1830     llvm_unreachable("Unexpected value type.");
1831   case MVT::i1: // Intentional fall-through.
1832   case MVT::i8:
1833     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1834     RC = (IsRet64Bit && !WantZExt) ?
1835              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1836     break;
1837   case MVT::i16:
1838     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1839     RC = (IsRet64Bit && !WantZExt) ?
1840              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1841     break;
1842   case MVT::i32:
1843     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1844     RC = (IsRet64Bit && !WantZExt) ?
1845              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1846     break;
1847   case MVT::i64:
1848     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1849     RC = &AArch64::GPR64RegClass;
1850     break;
1851   case MVT::f32:
1852     Opc = FPOpcTable[Idx][0];
1853     RC = &AArch64::FPR32RegClass;
1854     break;
1855   case MVT::f64:
1856     Opc = FPOpcTable[Idx][1];
1857     RC = &AArch64::FPR64RegClass;
1858     break;
1859   }
1860 
1861   // Create the base instruction, then add the operands.
1862   Register ResultReg = createResultReg(RC);
1863   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1864                                     TII.get(Opc), ResultReg);
1865   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1866 
1867   // Loading an i1 requires special handling.
1868   if (VT == MVT::i1) {
1869     unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);
1870     assert(ANDReg && "Unexpected AND instruction emission failure.");
1871     ResultReg = ANDReg;
1872   }
1873 
1874   // For zero-extending loads to 64bit we emit a 32bit load and then convert
1875   // the 32bit reg to a 64bit reg.
1876   if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1877     Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
1878     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1879             TII.get(AArch64::SUBREG_TO_REG), Reg64)
1880         .addImm(0)
1881         .addReg(ResultReg, getKillRegState(true))
1882         .addImm(AArch64::sub_32);
1883     ResultReg = Reg64;
1884   }
1885   return ResultReg;
1886 }
1887 
selectAddSub(const Instruction * I)1888 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1889   MVT VT;
1890   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1891     return false;
1892 
1893   if (VT.isVector())
1894     return selectOperator(I, I->getOpcode());
1895 
1896   unsigned ResultReg;
1897   switch (I->getOpcode()) {
1898   default:
1899     llvm_unreachable("Unexpected instruction.");
1900   case Instruction::Add:
1901     ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1902     break;
1903   case Instruction::Sub:
1904     ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1905     break;
1906   }
1907   if (!ResultReg)
1908     return false;
1909 
1910   updateValueMap(I, ResultReg);
1911   return true;
1912 }
1913 
selectLogicalOp(const Instruction * I)1914 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1915   MVT VT;
1916   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1917     return false;
1918 
1919   if (VT.isVector())
1920     return selectOperator(I, I->getOpcode());
1921 
1922   unsigned ResultReg;
1923   switch (I->getOpcode()) {
1924   default:
1925     llvm_unreachable("Unexpected instruction.");
1926   case Instruction::And:
1927     ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1928     break;
1929   case Instruction::Or:
1930     ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1931     break;
1932   case Instruction::Xor:
1933     ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1934     break;
1935   }
1936   if (!ResultReg)
1937     return false;
1938 
1939   updateValueMap(I, ResultReg);
1940   return true;
1941 }
1942 
selectLoad(const Instruction * I)1943 bool AArch64FastISel::selectLoad(const Instruction *I) {
1944   MVT VT;
1945   // Verify we have a legal type before going any further.  Currently, we handle
1946   // simple types that will directly fit in a register (i32/f32/i64/f64) or
1947   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1948   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1949       cast<LoadInst>(I)->isAtomic())
1950     return false;
1951 
1952   const Value *SV = I->getOperand(0);
1953   if (TLI.supportSwiftError()) {
1954     // Swifterror values can come from either a function parameter with
1955     // swifterror attribute or an alloca with swifterror attribute.
1956     if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1957       if (Arg->hasSwiftErrorAttr())
1958         return false;
1959     }
1960 
1961     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1962       if (Alloca->isSwiftError())
1963         return false;
1964     }
1965   }
1966 
1967   // See if we can handle this address.
1968   Address Addr;
1969   if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1970     return false;
1971 
1972   // Fold the following sign-/zero-extend into the load instruction.
1973   bool WantZExt = true;
1974   MVT RetVT = VT;
1975   const Value *IntExtVal = nullptr;
1976   if (I->hasOneUse()) {
1977     if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1978       if (isTypeSupported(ZE->getType(), RetVT))
1979         IntExtVal = ZE;
1980       else
1981         RetVT = VT;
1982     } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1983       if (isTypeSupported(SE->getType(), RetVT))
1984         IntExtVal = SE;
1985       else
1986         RetVT = VT;
1987       WantZExt = false;
1988     }
1989   }
1990 
1991   unsigned ResultReg =
1992       emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1993   if (!ResultReg)
1994     return false;
1995 
1996   // There are a few different cases we have to handle, because the load or the
1997   // sign-/zero-extend might not be selected by FastISel if we fall-back to
1998   // SelectionDAG. There is also an ordering issue when both instructions are in
1999   // different basic blocks.
2000   // 1.) The load instruction is selected by FastISel, but the integer extend
2001   //     not. This usually happens when the integer extend is in a different
2002   //     basic block and SelectionDAG took over for that basic block.
2003   // 2.) The load instruction is selected before the integer extend. This only
2004   //     happens when the integer extend is in a different basic block.
2005   // 3.) The load instruction is selected by SelectionDAG and the integer extend
2006   //     by FastISel. This happens if there are instructions between the load
2007   //     and the integer extend that couldn't be selected by FastISel.
2008   if (IntExtVal) {
2009     // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2010     // could select it. Emit a copy to subreg if necessary. FastISel will remove
2011     // it when it selects the integer extend.
2012     Register Reg = lookUpRegForValue(IntExtVal);
2013     auto *MI = MRI.getUniqueVRegDef(Reg);
2014     if (!MI) {
2015       if (RetVT == MVT::i64 && VT <= MVT::i32) {
2016         if (WantZExt) {
2017           // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2018           MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2019           ResultReg = std::prev(I)->getOperand(0).getReg();
2020           removeDeadCode(I, std::next(I));
2021         } else
2022           ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2023                                                  AArch64::sub_32);
2024       }
2025       updateValueMap(I, ResultReg);
2026       return true;
2027     }
2028 
2029     // The integer extend has already been emitted - delete all the instructions
2030     // that have been emitted by the integer extend lowering code and use the
2031     // result from the load instruction directly.
2032     while (MI) {
2033       Reg = 0;
2034       for (auto &Opnd : MI->uses()) {
2035         if (Opnd.isReg()) {
2036           Reg = Opnd.getReg();
2037           break;
2038         }
2039       }
2040       MachineBasicBlock::iterator I(MI);
2041       removeDeadCode(I, std::next(I));
2042       MI = nullptr;
2043       if (Reg)
2044         MI = MRI.getUniqueVRegDef(Reg);
2045     }
2046     updateValueMap(IntExtVal, ResultReg);
2047     return true;
2048   }
2049 
2050   updateValueMap(I, ResultReg);
2051   return true;
2052 }
2053 
emitStoreRelease(MVT VT,unsigned SrcReg,unsigned AddrReg,MachineMemOperand * MMO)2054 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2055                                        unsigned AddrReg,
2056                                        MachineMemOperand *MMO) {
2057   unsigned Opc;
2058   switch (VT.SimpleTy) {
2059   default: return false;
2060   case MVT::i8:  Opc = AArch64::STLRB; break;
2061   case MVT::i16: Opc = AArch64::STLRH; break;
2062   case MVT::i32: Opc = AArch64::STLRW; break;
2063   case MVT::i64: Opc = AArch64::STLRX; break;
2064   }
2065 
2066   const MCInstrDesc &II = TII.get(Opc);
2067   SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2068   AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2069   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2070       .addReg(SrcReg)
2071       .addReg(AddrReg)
2072       .addMemOperand(MMO);
2073   return true;
2074 }
2075 
emitStore(MVT VT,unsigned SrcReg,Address Addr,MachineMemOperand * MMO)2076 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2077                                 MachineMemOperand *MMO) {
2078   if (!TLI.allowsMisalignedMemoryAccesses(VT))
2079     return false;
2080 
2081   // Simplify this down to something we can handle.
2082   if (!simplifyAddress(Addr, VT))
2083     return false;
2084 
2085   unsigned ScaleFactor = getImplicitScaleFactor(VT);
2086   if (!ScaleFactor)
2087     llvm_unreachable("Unexpected value type.");
2088 
2089   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2090   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2091   bool UseScaled = true;
2092   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2093     UseScaled = false;
2094     ScaleFactor = 1;
2095   }
2096 
2097   static const unsigned OpcTable[4][6] = {
2098     { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
2099       AArch64::STURSi,   AArch64::STURDi },
2100     { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
2101       AArch64::STRSui,   AArch64::STRDui },
2102     { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2103       AArch64::STRSroX,  AArch64::STRDroX },
2104     { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2105       AArch64::STRSroW,  AArch64::STRDroW }
2106   };
2107 
2108   unsigned Opc;
2109   bool VTIsi1 = false;
2110   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2111                       Addr.getOffsetReg();
2112   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2113   if (Addr.getExtendType() == AArch64_AM::UXTW ||
2114       Addr.getExtendType() == AArch64_AM::SXTW)
2115     Idx++;
2116 
2117   switch (VT.SimpleTy) {
2118   default: llvm_unreachable("Unexpected value type.");
2119   case MVT::i1:  VTIsi1 = true; [[fallthrough]];
2120   case MVT::i8:  Opc = OpcTable[Idx][0]; break;
2121   case MVT::i16: Opc = OpcTable[Idx][1]; break;
2122   case MVT::i32: Opc = OpcTable[Idx][2]; break;
2123   case MVT::i64: Opc = OpcTable[Idx][3]; break;
2124   case MVT::f32: Opc = OpcTable[Idx][4]; break;
2125   case MVT::f64: Opc = OpcTable[Idx][5]; break;
2126   }
2127 
2128   // Storing an i1 requires special handling.
2129   if (VTIsi1 && SrcReg != AArch64::WZR) {
2130     unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);
2131     assert(ANDReg && "Unexpected AND instruction emission failure.");
2132     SrcReg = ANDReg;
2133   }
2134   // Create the base instruction, then add the operands.
2135   const MCInstrDesc &II = TII.get(Opc);
2136   SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2137   MachineInstrBuilder MIB =
2138       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(SrcReg);
2139   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2140 
2141   return true;
2142 }
2143 
selectStore(const Instruction * I)2144 bool AArch64FastISel::selectStore(const Instruction *I) {
2145   MVT VT;
2146   const Value *Op0 = I->getOperand(0);
2147   // Verify we have a legal type before going any further.  Currently, we handle
2148   // simple types that will directly fit in a register (i32/f32/i64/f64) or
2149   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2150   if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2151     return false;
2152 
2153   const Value *PtrV = I->getOperand(1);
2154   if (TLI.supportSwiftError()) {
2155     // Swifterror values can come from either a function parameter with
2156     // swifterror attribute or an alloca with swifterror attribute.
2157     if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2158       if (Arg->hasSwiftErrorAttr())
2159         return false;
2160     }
2161 
2162     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2163       if (Alloca->isSwiftError())
2164         return false;
2165     }
2166   }
2167 
2168   // Get the value to be stored into a register. Use the zero register directly
2169   // when possible to avoid an unnecessary copy and a wasted register.
2170   unsigned SrcReg = 0;
2171   if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2172     if (CI->isZero())
2173       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2174   } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2175     if (CF->isZero() && !CF->isNegative()) {
2176       VT = MVT::getIntegerVT(VT.getSizeInBits());
2177       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2178     }
2179   }
2180 
2181   if (!SrcReg)
2182     SrcReg = getRegForValue(Op0);
2183 
2184   if (!SrcReg)
2185     return false;
2186 
2187   auto *SI = cast<StoreInst>(I);
2188 
2189   // Try to emit a STLR for seq_cst/release.
2190   if (SI->isAtomic()) {
2191     AtomicOrdering Ord = SI->getOrdering();
2192     // The non-atomic instructions are sufficient for relaxed stores.
2193     if (isReleaseOrStronger(Ord)) {
2194       // The STLR addressing mode only supports a base reg; pass that directly.
2195       Register AddrReg = getRegForValue(PtrV);
2196       return emitStoreRelease(VT, SrcReg, AddrReg,
2197                               createMachineMemOperandFor(I));
2198     }
2199   }
2200 
2201   // See if we can handle this address.
2202   Address Addr;
2203   if (!computeAddress(PtrV, Addr, Op0->getType()))
2204     return false;
2205 
2206   if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2207     return false;
2208   return true;
2209 }
2210 
getCompareCC(CmpInst::Predicate Pred)2211 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
2212   switch (Pred) {
2213   case CmpInst::FCMP_ONE:
2214   case CmpInst::FCMP_UEQ:
2215   default:
2216     // AL is our "false" for now. The other two need more compares.
2217     return AArch64CC::AL;
2218   case CmpInst::ICMP_EQ:
2219   case CmpInst::FCMP_OEQ:
2220     return AArch64CC::EQ;
2221   case CmpInst::ICMP_SGT:
2222   case CmpInst::FCMP_OGT:
2223     return AArch64CC::GT;
2224   case CmpInst::ICMP_SGE:
2225   case CmpInst::FCMP_OGE:
2226     return AArch64CC::GE;
2227   case CmpInst::ICMP_UGT:
2228   case CmpInst::FCMP_UGT:
2229     return AArch64CC::HI;
2230   case CmpInst::FCMP_OLT:
2231     return AArch64CC::MI;
2232   case CmpInst::ICMP_ULE:
2233   case CmpInst::FCMP_OLE:
2234     return AArch64CC::LS;
2235   case CmpInst::FCMP_ORD:
2236     return AArch64CC::VC;
2237   case CmpInst::FCMP_UNO:
2238     return AArch64CC::VS;
2239   case CmpInst::FCMP_UGE:
2240     return AArch64CC::PL;
2241   case CmpInst::ICMP_SLT:
2242   case CmpInst::FCMP_ULT:
2243     return AArch64CC::LT;
2244   case CmpInst::ICMP_SLE:
2245   case CmpInst::FCMP_ULE:
2246     return AArch64CC::LE;
2247   case CmpInst::FCMP_UNE:
2248   case CmpInst::ICMP_NE:
2249     return AArch64CC::NE;
2250   case CmpInst::ICMP_UGE:
2251     return AArch64CC::HS;
2252   case CmpInst::ICMP_ULT:
2253     return AArch64CC::LO;
2254   }
2255 }
2256 
2257 /// Try to emit a combined compare-and-branch instruction.
emitCompareAndBranch(const BranchInst * BI)2258 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2259   // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2260   // will not be produced, as they are conditional branch instructions that do
2261   // not set flags.
2262   if (FuncInfo.MF->getFunction().hasFnAttribute(
2263           Attribute::SpeculativeLoadHardening))
2264     return false;
2265 
2266   assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2267   const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2268   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2269 
2270   const Value *LHS = CI->getOperand(0);
2271   const Value *RHS = CI->getOperand(1);
2272 
2273   MVT VT;
2274   if (!isTypeSupported(LHS->getType(), VT))
2275     return false;
2276 
2277   unsigned BW = VT.getSizeInBits();
2278   if (BW > 64)
2279     return false;
2280 
2281   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2282   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2283 
2284   // Try to take advantage of fallthrough opportunities.
2285   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2286     std::swap(TBB, FBB);
2287     Predicate = CmpInst::getInversePredicate(Predicate);
2288   }
2289 
2290   int TestBit = -1;
2291   bool IsCmpNE;
2292   switch (Predicate) {
2293   default:
2294     return false;
2295   case CmpInst::ICMP_EQ:
2296   case CmpInst::ICMP_NE:
2297     if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2298       std::swap(LHS, RHS);
2299 
2300     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2301       return false;
2302 
2303     if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2304       if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2305         const Value *AndLHS = AI->getOperand(0);
2306         const Value *AndRHS = AI->getOperand(1);
2307 
2308         if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2309           if (C->getValue().isPowerOf2())
2310             std::swap(AndLHS, AndRHS);
2311 
2312         if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2313           if (C->getValue().isPowerOf2()) {
2314             TestBit = C->getValue().logBase2();
2315             LHS = AndLHS;
2316           }
2317       }
2318 
2319     if (VT == MVT::i1)
2320       TestBit = 0;
2321 
2322     IsCmpNE = Predicate == CmpInst::ICMP_NE;
2323     break;
2324   case CmpInst::ICMP_SLT:
2325   case CmpInst::ICMP_SGE:
2326     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2327       return false;
2328 
2329     TestBit = BW - 1;
2330     IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2331     break;
2332   case CmpInst::ICMP_SGT:
2333   case CmpInst::ICMP_SLE:
2334     if (!isa<ConstantInt>(RHS))
2335       return false;
2336 
2337     if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2338       return false;
2339 
2340     TestBit = BW - 1;
2341     IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2342     break;
2343   } // end switch
2344 
2345   static const unsigned OpcTable[2][2][2] = {
2346     { {AArch64::CBZW,  AArch64::CBZX },
2347       {AArch64::CBNZW, AArch64::CBNZX} },
2348     { {AArch64::TBZW,  AArch64::TBZX },
2349       {AArch64::TBNZW, AArch64::TBNZX} }
2350   };
2351 
2352   bool IsBitTest = TestBit != -1;
2353   bool Is64Bit = BW == 64;
2354   if (TestBit < 32 && TestBit >= 0)
2355     Is64Bit = false;
2356 
2357   unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2358   const MCInstrDesc &II = TII.get(Opc);
2359 
2360   Register SrcReg = getRegForValue(LHS);
2361   if (!SrcReg)
2362     return false;
2363 
2364   if (BW == 64 && !Is64Bit)
2365     SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
2366 
2367   if ((BW < 32) && !IsBitTest)
2368     SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2369 
2370   // Emit the combined compare and branch instruction.
2371   SrcReg = constrainOperandRegClass(II, SrcReg,  II.getNumDefs());
2372   MachineInstrBuilder MIB =
2373       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
2374           .addReg(SrcReg);
2375   if (IsBitTest)
2376     MIB.addImm(TestBit);
2377   MIB.addMBB(TBB);
2378 
2379   finishCondBranch(BI->getParent(), TBB, FBB);
2380   return true;
2381 }
2382 
selectBranch(const Instruction * I)2383 bool AArch64FastISel::selectBranch(const Instruction *I) {
2384   const BranchInst *BI = cast<BranchInst>(I);
2385   if (BI->isUnconditional()) {
2386     MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2387     fastEmitBranch(MSucc, BI->getDebugLoc());
2388     return true;
2389   }
2390 
2391   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2392   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2393 
2394   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2395     if (CI->hasOneUse() && isValueAvailable(CI)) {
2396       // Try to optimize or fold the cmp.
2397       CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2398       switch (Predicate) {
2399       default:
2400         break;
2401       case CmpInst::FCMP_FALSE:
2402         fastEmitBranch(FBB, MIMD.getDL());
2403         return true;
2404       case CmpInst::FCMP_TRUE:
2405         fastEmitBranch(TBB, MIMD.getDL());
2406         return true;
2407       }
2408 
2409       // Try to emit a combined compare-and-branch first.
2410       if (emitCompareAndBranch(BI))
2411         return true;
2412 
2413       // Try to take advantage of fallthrough opportunities.
2414       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2415         std::swap(TBB, FBB);
2416         Predicate = CmpInst::getInversePredicate(Predicate);
2417       }
2418 
2419       // Emit the cmp.
2420       if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2421         return false;
2422 
2423       // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2424       // instruction.
2425       AArch64CC::CondCode CC = getCompareCC(Predicate);
2426       AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2427       switch (Predicate) {
2428       default:
2429         break;
2430       case CmpInst::FCMP_UEQ:
2431         ExtraCC = AArch64CC::EQ;
2432         CC = AArch64CC::VS;
2433         break;
2434       case CmpInst::FCMP_ONE:
2435         ExtraCC = AArch64CC::MI;
2436         CC = AArch64CC::GT;
2437         break;
2438       }
2439       assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2440 
2441       // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2442       if (ExtraCC != AArch64CC::AL) {
2443         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2444             .addImm(ExtraCC)
2445             .addMBB(TBB);
2446       }
2447 
2448       // Emit the branch.
2449       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2450           .addImm(CC)
2451           .addMBB(TBB);
2452 
2453       finishCondBranch(BI->getParent(), TBB, FBB);
2454       return true;
2455     }
2456   } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2457     uint64_t Imm = CI->getZExtValue();
2458     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2459     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B))
2460         .addMBB(Target);
2461 
2462     // Obtain the branch probability and add the target to the successor list.
2463     if (FuncInfo.BPI) {
2464       auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2465           BI->getParent(), Target->getBasicBlock());
2466       FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2467     } else
2468       FuncInfo.MBB->addSuccessorWithoutProb(Target);
2469     return true;
2470   } else {
2471     AArch64CC::CondCode CC = AArch64CC::NE;
2472     if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2473       // Fake request the condition, otherwise the intrinsic might be completely
2474       // optimized away.
2475       Register CondReg = getRegForValue(BI->getCondition());
2476       if (!CondReg)
2477         return false;
2478 
2479       // Emit the branch.
2480       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2481         .addImm(CC)
2482         .addMBB(TBB);
2483 
2484       finishCondBranch(BI->getParent(), TBB, FBB);
2485       return true;
2486     }
2487   }
2488 
2489   Register CondReg = getRegForValue(BI->getCondition());
2490   if (CondReg == 0)
2491     return false;
2492 
2493   // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2494   unsigned Opcode = AArch64::TBNZW;
2495   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2496     std::swap(TBB, FBB);
2497     Opcode = AArch64::TBZW;
2498   }
2499 
2500   const MCInstrDesc &II = TII.get(Opcode);
2501   Register ConstrainedCondReg
2502     = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2503   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2504       .addReg(ConstrainedCondReg)
2505       .addImm(0)
2506       .addMBB(TBB);
2507 
2508   finishCondBranch(BI->getParent(), TBB, FBB);
2509   return true;
2510 }
2511 
selectIndirectBr(const Instruction * I)2512 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2513   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2514   Register AddrReg = getRegForValue(BI->getOperand(0));
2515   if (AddrReg == 0)
2516     return false;
2517 
2518   // Emit the indirect branch.
2519   const MCInstrDesc &II = TII.get(AArch64::BR);
2520   AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
2521   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(AddrReg);
2522 
2523   // Make sure the CFG is up-to-date.
2524   for (const auto *Succ : BI->successors())
2525     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2526 
2527   return true;
2528 }
2529 
selectCmp(const Instruction * I)2530 bool AArch64FastISel::selectCmp(const Instruction *I) {
2531   const CmpInst *CI = cast<CmpInst>(I);
2532 
2533   // Vectors of i1 are weird: bail out.
2534   if (CI->getType()->isVectorTy())
2535     return false;
2536 
2537   // Try to optimize or fold the cmp.
2538   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2539   unsigned ResultReg = 0;
2540   switch (Predicate) {
2541   default:
2542     break;
2543   case CmpInst::FCMP_FALSE:
2544     ResultReg = createResultReg(&AArch64::GPR32RegClass);
2545     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2546             TII.get(TargetOpcode::COPY), ResultReg)
2547         .addReg(AArch64::WZR, getKillRegState(true));
2548     break;
2549   case CmpInst::FCMP_TRUE:
2550     ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2551     break;
2552   }
2553 
2554   if (ResultReg) {
2555     updateValueMap(I, ResultReg);
2556     return true;
2557   }
2558 
2559   // Emit the cmp.
2560   if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2561     return false;
2562 
2563   ResultReg = createResultReg(&AArch64::GPR32RegClass);
2564 
2565   // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2566   // condition codes are inverted, because they are used by CSINC.
2567   static unsigned CondCodeTable[2][2] = {
2568     { AArch64CC::NE, AArch64CC::VC },
2569     { AArch64CC::PL, AArch64CC::LE }
2570   };
2571   unsigned *CondCodes = nullptr;
2572   switch (Predicate) {
2573   default:
2574     break;
2575   case CmpInst::FCMP_UEQ:
2576     CondCodes = &CondCodeTable[0][0];
2577     break;
2578   case CmpInst::FCMP_ONE:
2579     CondCodes = &CondCodeTable[1][0];
2580     break;
2581   }
2582 
2583   if (CondCodes) {
2584     Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2585     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2586             TmpReg1)
2587         .addReg(AArch64::WZR, getKillRegState(true))
2588         .addReg(AArch64::WZR, getKillRegState(true))
2589         .addImm(CondCodes[0]);
2590     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2591             ResultReg)
2592         .addReg(TmpReg1, getKillRegState(true))
2593         .addReg(AArch64::WZR, getKillRegState(true))
2594         .addImm(CondCodes[1]);
2595 
2596     updateValueMap(I, ResultReg);
2597     return true;
2598   }
2599 
2600   // Now set a register based on the comparison.
2601   AArch64CC::CondCode CC = getCompareCC(Predicate);
2602   assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2603   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2604   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2605           ResultReg)
2606       .addReg(AArch64::WZR, getKillRegState(true))
2607       .addReg(AArch64::WZR, getKillRegState(true))
2608       .addImm(invertedCC);
2609 
2610   updateValueMap(I, ResultReg);
2611   return true;
2612 }
2613 
2614 /// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2615 /// value.
optimizeSelect(const SelectInst * SI)2616 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2617   if (!SI->getType()->isIntegerTy(1))
2618     return false;
2619 
2620   const Value *Src1Val, *Src2Val;
2621   unsigned Opc = 0;
2622   bool NeedExtraOp = false;
2623   if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2624     if (CI->isOne()) {
2625       Src1Val = SI->getCondition();
2626       Src2Val = SI->getFalseValue();
2627       Opc = AArch64::ORRWrr;
2628     } else {
2629       assert(CI->isZero());
2630       Src1Val = SI->getFalseValue();
2631       Src2Val = SI->getCondition();
2632       Opc = AArch64::BICWrr;
2633     }
2634   } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2635     if (CI->isOne()) {
2636       Src1Val = SI->getCondition();
2637       Src2Val = SI->getTrueValue();
2638       Opc = AArch64::ORRWrr;
2639       NeedExtraOp = true;
2640     } else {
2641       assert(CI->isZero());
2642       Src1Val = SI->getCondition();
2643       Src2Val = SI->getTrueValue();
2644       Opc = AArch64::ANDWrr;
2645     }
2646   }
2647 
2648   if (!Opc)
2649     return false;
2650 
2651   Register Src1Reg = getRegForValue(Src1Val);
2652   if (!Src1Reg)
2653     return false;
2654 
2655   Register Src2Reg = getRegForValue(Src2Val);
2656   if (!Src2Reg)
2657     return false;
2658 
2659   if (NeedExtraOp)
2660     Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1);
2661 
2662   Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2663                                        Src2Reg);
2664   updateValueMap(SI, ResultReg);
2665   return true;
2666 }
2667 
selectSelect(const Instruction * I)2668 bool AArch64FastISel::selectSelect(const Instruction *I) {
2669   assert(isa<SelectInst>(I) && "Expected a select instruction.");
2670   MVT VT;
2671   if (!isTypeSupported(I->getType(), VT))
2672     return false;
2673 
2674   unsigned Opc;
2675   const TargetRegisterClass *RC;
2676   switch (VT.SimpleTy) {
2677   default:
2678     return false;
2679   case MVT::i1:
2680   case MVT::i8:
2681   case MVT::i16:
2682   case MVT::i32:
2683     Opc = AArch64::CSELWr;
2684     RC = &AArch64::GPR32RegClass;
2685     break;
2686   case MVT::i64:
2687     Opc = AArch64::CSELXr;
2688     RC = &AArch64::GPR64RegClass;
2689     break;
2690   case MVT::f32:
2691     Opc = AArch64::FCSELSrrr;
2692     RC = &AArch64::FPR32RegClass;
2693     break;
2694   case MVT::f64:
2695     Opc = AArch64::FCSELDrrr;
2696     RC = &AArch64::FPR64RegClass;
2697     break;
2698   }
2699 
2700   const SelectInst *SI = cast<SelectInst>(I);
2701   const Value *Cond = SI->getCondition();
2702   AArch64CC::CondCode CC = AArch64CC::NE;
2703   AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2704 
2705   if (optimizeSelect(SI))
2706     return true;
2707 
2708   // Try to pickup the flags, so we don't have to emit another compare.
2709   if (foldXALUIntrinsic(CC, I, Cond)) {
2710     // Fake request the condition to force emission of the XALU intrinsic.
2711     Register CondReg = getRegForValue(Cond);
2712     if (!CondReg)
2713       return false;
2714   } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2715              isValueAvailable(Cond)) {
2716     const auto *Cmp = cast<CmpInst>(Cond);
2717     // Try to optimize or fold the cmp.
2718     CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2719     const Value *FoldSelect = nullptr;
2720     switch (Predicate) {
2721     default:
2722       break;
2723     case CmpInst::FCMP_FALSE:
2724       FoldSelect = SI->getFalseValue();
2725       break;
2726     case CmpInst::FCMP_TRUE:
2727       FoldSelect = SI->getTrueValue();
2728       break;
2729     }
2730 
2731     if (FoldSelect) {
2732       Register SrcReg = getRegForValue(FoldSelect);
2733       if (!SrcReg)
2734         return false;
2735 
2736       updateValueMap(I, SrcReg);
2737       return true;
2738     }
2739 
2740     // Emit the cmp.
2741     if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2742       return false;
2743 
2744     // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2745     CC = getCompareCC(Predicate);
2746     switch (Predicate) {
2747     default:
2748       break;
2749     case CmpInst::FCMP_UEQ:
2750       ExtraCC = AArch64CC::EQ;
2751       CC = AArch64CC::VS;
2752       break;
2753     case CmpInst::FCMP_ONE:
2754       ExtraCC = AArch64CC::MI;
2755       CC = AArch64CC::GT;
2756       break;
2757     }
2758     assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2759   } else {
2760     Register CondReg = getRegForValue(Cond);
2761     if (!CondReg)
2762       return false;
2763 
2764     const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2765     CondReg = constrainOperandRegClass(II, CondReg, 1);
2766 
2767     // Emit a TST instruction (ANDS wzr, reg, #imm).
2768     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II,
2769             AArch64::WZR)
2770         .addReg(CondReg)
2771         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2772   }
2773 
2774   Register Src1Reg = getRegForValue(SI->getTrueValue());
2775   Register Src2Reg = getRegForValue(SI->getFalseValue());
2776 
2777   if (!Src1Reg || !Src2Reg)
2778     return false;
2779 
2780   if (ExtraCC != AArch64CC::AL)
2781     Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);
2782 
2783   Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);
2784   updateValueMap(I, ResultReg);
2785   return true;
2786 }
2787 
selectFPExt(const Instruction * I)2788 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2789   Value *V = I->getOperand(0);
2790   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2791     return false;
2792 
2793   Register Op = getRegForValue(V);
2794   if (Op == 0)
2795     return false;
2796 
2797   Register ResultReg = createResultReg(&AArch64::FPR64RegClass);
2798   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTDSr),
2799           ResultReg).addReg(Op);
2800   updateValueMap(I, ResultReg);
2801   return true;
2802 }
2803 
selectFPTrunc(const Instruction * I)2804 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2805   Value *V = I->getOperand(0);
2806   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2807     return false;
2808 
2809   Register Op = getRegForValue(V);
2810   if (Op == 0)
2811     return false;
2812 
2813   Register ResultReg = createResultReg(&AArch64::FPR32RegClass);
2814   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTSDr),
2815           ResultReg).addReg(Op);
2816   updateValueMap(I, ResultReg);
2817   return true;
2818 }
2819 
2820 // FPToUI and FPToSI
selectFPToInt(const Instruction * I,bool Signed)2821 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2822   MVT DestVT;
2823   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2824     return false;
2825 
2826   Register SrcReg = getRegForValue(I->getOperand(0));
2827   if (SrcReg == 0)
2828     return false;
2829 
2830   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2831   if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
2832     return false;
2833 
2834   unsigned Opc;
2835   if (SrcVT == MVT::f64) {
2836     if (Signed)
2837       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2838     else
2839       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2840   } else {
2841     if (Signed)
2842       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2843     else
2844       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2845   }
2846   Register ResultReg = createResultReg(
2847       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2848   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
2849       .addReg(SrcReg);
2850   updateValueMap(I, ResultReg);
2851   return true;
2852 }
2853 
selectIntToFP(const Instruction * I,bool Signed)2854 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2855   MVT DestVT;
2856   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2857     return false;
2858   // Let regular ISEL handle FP16
2859   if (DestVT == MVT::f16)
2860     return false;
2861 
2862   assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2863          "Unexpected value type.");
2864 
2865   Register SrcReg = getRegForValue(I->getOperand(0));
2866   if (!SrcReg)
2867     return false;
2868 
2869   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2870 
2871   // Handle sign-extension.
2872   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2873     SrcReg =
2874         emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2875     if (!SrcReg)
2876       return false;
2877   }
2878 
2879   unsigned Opc;
2880   if (SrcVT == MVT::i64) {
2881     if (Signed)
2882       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2883     else
2884       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2885   } else {
2886     if (Signed)
2887       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2888     else
2889       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2890   }
2891 
2892   Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
2893   updateValueMap(I, ResultReg);
2894   return true;
2895 }
2896 
fastLowerArguments()2897 bool AArch64FastISel::fastLowerArguments() {
2898   if (!FuncInfo.CanLowerReturn)
2899     return false;
2900 
2901   const Function *F = FuncInfo.Fn;
2902   if (F->isVarArg())
2903     return false;
2904 
2905   CallingConv::ID CC = F->getCallingConv();
2906   if (CC != CallingConv::C && CC != CallingConv::Swift)
2907     return false;
2908 
2909   if (Subtarget->hasCustomCallingConv())
2910     return false;
2911 
2912   // Only handle simple cases of up to 8 GPR and FPR each.
2913   unsigned GPRCnt = 0;
2914   unsigned FPRCnt = 0;
2915   for (auto const &Arg : F->args()) {
2916     if (Arg.hasAttribute(Attribute::ByVal) ||
2917         Arg.hasAttribute(Attribute::InReg) ||
2918         Arg.hasAttribute(Attribute::StructRet) ||
2919         Arg.hasAttribute(Attribute::SwiftSelf) ||
2920         Arg.hasAttribute(Attribute::SwiftAsync) ||
2921         Arg.hasAttribute(Attribute::SwiftError) ||
2922         Arg.hasAttribute(Attribute::Nest))
2923       return false;
2924 
2925     Type *ArgTy = Arg.getType();
2926     if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2927       return false;
2928 
2929     EVT ArgVT = TLI.getValueType(DL, ArgTy);
2930     if (!ArgVT.isSimple())
2931       return false;
2932 
2933     MVT VT = ArgVT.getSimpleVT().SimpleTy;
2934     if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2935       return false;
2936 
2937     if (VT.isVector() &&
2938         (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2939       return false;
2940 
2941     if (VT >= MVT::i1 && VT <= MVT::i64)
2942       ++GPRCnt;
2943     else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2944              VT.is128BitVector())
2945       ++FPRCnt;
2946     else
2947       return false;
2948 
2949     if (GPRCnt > 8 || FPRCnt > 8)
2950       return false;
2951   }
2952 
2953   static const MCPhysReg Registers[6][8] = {
2954     { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2955       AArch64::W5, AArch64::W6, AArch64::W7 },
2956     { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2957       AArch64::X5, AArch64::X6, AArch64::X7 },
2958     { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2959       AArch64::H5, AArch64::H6, AArch64::H7 },
2960     { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2961       AArch64::S5, AArch64::S6, AArch64::S7 },
2962     { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2963       AArch64::D5, AArch64::D6, AArch64::D7 },
2964     { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2965       AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2966   };
2967 
2968   unsigned GPRIdx = 0;
2969   unsigned FPRIdx = 0;
2970   for (auto const &Arg : F->args()) {
2971     MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2972     unsigned SrcReg;
2973     const TargetRegisterClass *RC;
2974     if (VT >= MVT::i1 && VT <= MVT::i32) {
2975       SrcReg = Registers[0][GPRIdx++];
2976       RC = &AArch64::GPR32RegClass;
2977       VT = MVT::i32;
2978     } else if (VT == MVT::i64) {
2979       SrcReg = Registers[1][GPRIdx++];
2980       RC = &AArch64::GPR64RegClass;
2981     } else if (VT == MVT::f16) {
2982       SrcReg = Registers[2][FPRIdx++];
2983       RC = &AArch64::FPR16RegClass;
2984     } else if (VT ==  MVT::f32) {
2985       SrcReg = Registers[3][FPRIdx++];
2986       RC = &AArch64::FPR32RegClass;
2987     } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2988       SrcReg = Registers[4][FPRIdx++];
2989       RC = &AArch64::FPR64RegClass;
2990     } else if (VT.is128BitVector()) {
2991       SrcReg = Registers[5][FPRIdx++];
2992       RC = &AArch64::FPR128RegClass;
2993     } else
2994       llvm_unreachable("Unexpected value type.");
2995 
2996     Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
2997     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
2998     // Without this, EmitLiveInCopies may eliminate the livein if its only
2999     // use is a bitcast (which isn't turned into an instruction).
3000     Register ResultReg = createResultReg(RC);
3001     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3002             TII.get(TargetOpcode::COPY), ResultReg)
3003         .addReg(DstReg, getKillRegState(true));
3004     updateValueMap(&Arg, ResultReg);
3005   }
3006   return true;
3007 }
3008 
processCallArgs(CallLoweringInfo & CLI,SmallVectorImpl<MVT> & OutVTs,unsigned & NumBytes)3009 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3010                                       SmallVectorImpl<MVT> &OutVTs,
3011                                       unsigned &NumBytes) {
3012   CallingConv::ID CC = CLI.CallConv;
3013   SmallVector<CCValAssign, 16> ArgLocs;
3014   CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3015   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3016 
3017   // Get a count of how many bytes are to be pushed on the stack.
3018   NumBytes = CCInfo.getStackSize();
3019 
3020   // Issue CALLSEQ_START
3021   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3022   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown))
3023     .addImm(NumBytes).addImm(0);
3024 
3025   // Process the args.
3026   for (CCValAssign &VA : ArgLocs) {
3027     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3028     MVT ArgVT = OutVTs[VA.getValNo()];
3029 
3030     Register ArgReg = getRegForValue(ArgVal);
3031     if (!ArgReg)
3032       return false;
3033 
3034     // Handle arg promotion: SExt, ZExt, AExt.
3035     switch (VA.getLocInfo()) {
3036     case CCValAssign::Full:
3037       break;
3038     case CCValAssign::SExt: {
3039       MVT DestVT = VA.getLocVT();
3040       MVT SrcVT = ArgVT;
3041       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3042       if (!ArgReg)
3043         return false;
3044       break;
3045     }
3046     case CCValAssign::AExt:
3047     // Intentional fall-through.
3048     case CCValAssign::ZExt: {
3049       MVT DestVT = VA.getLocVT();
3050       MVT SrcVT = ArgVT;
3051       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3052       if (!ArgReg)
3053         return false;
3054       break;
3055     }
3056     default:
3057       llvm_unreachable("Unknown arg promotion!");
3058     }
3059 
3060     // Now copy/store arg to correct locations.
3061     if (VA.isRegLoc() && !VA.needsCustom()) {
3062       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3063               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3064       CLI.OutRegs.push_back(VA.getLocReg());
3065     } else if (VA.needsCustom()) {
3066       // FIXME: Handle custom args.
3067       return false;
3068     } else {
3069       assert(VA.isMemLoc() && "Assuming store on stack.");
3070 
3071       // Don't emit stores for undef values.
3072       if (isa<UndefValue>(ArgVal))
3073         continue;
3074 
3075       // Need to store on the stack.
3076       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3077 
3078       unsigned BEAlign = 0;
3079       if (ArgSize < 8 && !Subtarget->isLittleEndian())
3080         BEAlign = 8 - ArgSize;
3081 
3082       Address Addr;
3083       Addr.setKind(Address::RegBase);
3084       Addr.setReg(AArch64::SP);
3085       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3086 
3087       Align Alignment = DL.getABITypeAlign(ArgVal->getType());
3088       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3089           MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3090           MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3091 
3092       if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3093         return false;
3094     }
3095   }
3096   return true;
3097 }
3098 
finishCall(CallLoweringInfo & CLI,unsigned NumBytes)3099 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, unsigned NumBytes) {
3100   CallingConv::ID CC = CLI.CallConv;
3101 
3102   // Issue CALLSEQ_END
3103   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3104   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp))
3105     .addImm(NumBytes).addImm(0);
3106 
3107   // Now the return values.
3108   SmallVector<CCValAssign, 16> RVLocs;
3109   CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3110   CCInfo.AnalyzeCallResult(CLI.Ins, CCAssignFnForCall(CC));
3111 
3112   Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3113   for (unsigned i = 0; i != RVLocs.size(); ++i) {
3114     CCValAssign &VA = RVLocs[i];
3115     MVT CopyVT = VA.getValVT();
3116     unsigned CopyReg = ResultReg + i;
3117 
3118     // TODO: Handle big-endian results
3119     if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3120       return false;
3121 
3122     // Copy result out of their specified physreg.
3123     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
3124             CopyReg)
3125         .addReg(VA.getLocReg());
3126     CLI.InRegs.push_back(VA.getLocReg());
3127   }
3128 
3129   CLI.ResultReg = ResultReg;
3130   CLI.NumResultRegs = RVLocs.size();
3131 
3132   return true;
3133 }
3134 
fastLowerCall(CallLoweringInfo & CLI)3135 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3136   CallingConv::ID CC  = CLI.CallConv;
3137   bool IsTailCall     = CLI.IsTailCall;
3138   bool IsVarArg       = CLI.IsVarArg;
3139   const Value *Callee = CLI.Callee;
3140   MCSymbol *Symbol = CLI.Symbol;
3141 
3142   if (!Callee && !Symbol)
3143     return false;
3144 
3145   // Allow SelectionDAG isel to handle calls to functions like setjmp that need
3146   // a bti instruction following the call.
3147   if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
3148       !Subtarget->noBTIAtReturnTwice() &&
3149       MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
3150     return false;
3151 
3152   // Allow SelectionDAG isel to handle indirect calls with KCFI checks.
3153   if (CLI.CB && CLI.CB->isIndirectCall() &&
3154       CLI.CB->getOperandBundle(LLVMContext::OB_kcfi))
3155     return false;
3156 
3157   // Allow SelectionDAG isel to handle tail calls.
3158   if (IsTailCall)
3159     return false;
3160 
3161   // FIXME: we could and should support this, but for now correctness at -O0 is
3162   // more important.
3163   if (Subtarget->isTargetILP32())
3164     return false;
3165 
3166   CodeModel::Model CM = TM.getCodeModel();
3167   // Only support the small-addressing and large code models.
3168   if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3169     return false;
3170 
3171   // FIXME: Add large code model support for ELF.
3172   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3173     return false;
3174 
3175   // Let SDISel handle vararg functions.
3176   if (IsVarArg)
3177     return false;
3178 
3179   if (Subtarget->isWindowsArm64EC())
3180     return false;
3181 
3182   for (auto Flag : CLI.OutFlags)
3183     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3184         Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
3185       return false;
3186 
3187   // Set up the argument vectors.
3188   SmallVector<MVT, 16> OutVTs;
3189   OutVTs.reserve(CLI.OutVals.size());
3190 
3191   for (auto *Val : CLI.OutVals) {
3192     MVT VT;
3193     if (!isTypeLegal(Val->getType(), VT) &&
3194         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3195       return false;
3196 
3197     // We don't handle vector parameters yet.
3198     if (VT.isVector() || VT.getSizeInBits() > 64)
3199       return false;
3200 
3201     OutVTs.push_back(VT);
3202   }
3203 
3204   Address Addr;
3205   if (Callee && !computeCallAddress(Callee, Addr))
3206     return false;
3207 
3208   // The weak function target may be zero; in that case we must use indirect
3209   // addressing via a stub on windows as it may be out of range for a
3210   // PC-relative jump.
3211   if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
3212       Addr.getGlobalValue()->hasExternalWeakLinkage())
3213     return false;
3214 
3215   // Handle the arguments now that we've gotten them.
3216   unsigned NumBytes;
3217   if (!processCallArgs(CLI, OutVTs, NumBytes))
3218     return false;
3219 
3220   const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3221   if (RegInfo->isAnyArgRegReserved(*MF))
3222     RegInfo->emitReservedArgRegCallError(*MF);
3223 
3224   // Issue the call.
3225   MachineInstrBuilder MIB;
3226   if (Subtarget->useSmallAddressing()) {
3227     const MCInstrDesc &II =
3228         TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL);
3229     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II);
3230     if (Symbol)
3231       MIB.addSym(Symbol, 0);
3232     else if (Addr.getGlobalValue())
3233       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3234     else if (Addr.getReg()) {
3235       Register Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3236       MIB.addReg(Reg);
3237     } else
3238       return false;
3239   } else {
3240     unsigned CallReg = 0;
3241     if (Symbol) {
3242       Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3243       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
3244               ADRPReg)
3245           .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3246 
3247       CallReg = createResultReg(&AArch64::GPR64RegClass);
3248       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3249               TII.get(AArch64::LDRXui), CallReg)
3250           .addReg(ADRPReg)
3251           .addSym(Symbol,
3252                   AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3253     } else if (Addr.getGlobalValue())
3254       CallReg = materializeGV(Addr.getGlobalValue());
3255     else if (Addr.getReg())
3256       CallReg = Addr.getReg();
3257 
3258     if (!CallReg)
3259       return false;
3260 
3261     const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));
3262     CallReg = constrainOperandRegClass(II, CallReg, 0);
3263     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(CallReg);
3264   }
3265 
3266   // Add implicit physical register uses to the call.
3267   for (auto Reg : CLI.OutRegs)
3268     MIB.addReg(Reg, RegState::Implicit);
3269 
3270   // Add a register mask with the call-preserved registers.
3271   // Proper defs for return values will be added by setPhysRegsDeadExcept().
3272   MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3273 
3274   CLI.Call = MIB;
3275 
3276   // Finish off the call including any return values.
3277   return finishCall(CLI, NumBytes);
3278 }
3279 
isMemCpySmall(uint64_t Len,MaybeAlign Alignment)3280 bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) {
3281   if (Alignment)
3282     return Len / Alignment->value() <= 4;
3283   else
3284     return Len < 32;
3285 }
3286 
tryEmitSmallMemCpy(Address Dest,Address Src,uint64_t Len,MaybeAlign Alignment)3287 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3288                                          uint64_t Len, MaybeAlign Alignment) {
3289   // Make sure we don't bloat code by inlining very large memcpy's.
3290   if (!isMemCpySmall(Len, Alignment))
3291     return false;
3292 
3293   int64_t UnscaledOffset = 0;
3294   Address OrigDest = Dest;
3295   Address OrigSrc = Src;
3296 
3297   while (Len) {
3298     MVT VT;
3299     if (!Alignment || *Alignment >= 8) {
3300       if (Len >= 8)
3301         VT = MVT::i64;
3302       else if (Len >= 4)
3303         VT = MVT::i32;
3304       else if (Len >= 2)
3305         VT = MVT::i16;
3306       else {
3307         VT = MVT::i8;
3308       }
3309     } else {
3310       assert(Alignment && "Alignment is set in this branch");
3311       // Bound based on alignment.
3312       if (Len >= 4 && *Alignment == 4)
3313         VT = MVT::i32;
3314       else if (Len >= 2 && *Alignment == 2)
3315         VT = MVT::i16;
3316       else {
3317         VT = MVT::i8;
3318       }
3319     }
3320 
3321     unsigned ResultReg = emitLoad(VT, VT, Src);
3322     if (!ResultReg)
3323       return false;
3324 
3325     if (!emitStore(VT, ResultReg, Dest))
3326       return false;
3327 
3328     int64_t Size = VT.getSizeInBits() / 8;
3329     Len -= Size;
3330     UnscaledOffset += Size;
3331 
3332     // We need to recompute the unscaled offset for each iteration.
3333     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3334     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3335   }
3336 
3337   return true;
3338 }
3339 
3340 /// Check if it is possible to fold the condition from the XALU intrinsic
3341 /// into the user. The condition code will only be updated on success.
foldXALUIntrinsic(AArch64CC::CondCode & CC,const Instruction * I,const Value * Cond)3342 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3343                                         const Instruction *I,
3344                                         const Value *Cond) {
3345   if (!isa<ExtractValueInst>(Cond))
3346     return false;
3347 
3348   const auto *EV = cast<ExtractValueInst>(Cond);
3349   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3350     return false;
3351 
3352   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3353   MVT RetVT;
3354   const Function *Callee = II->getCalledFunction();
3355   Type *RetTy =
3356   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3357   if (!isTypeLegal(RetTy, RetVT))
3358     return false;
3359 
3360   if (RetVT != MVT::i32 && RetVT != MVT::i64)
3361     return false;
3362 
3363   const Value *LHS = II->getArgOperand(0);
3364   const Value *RHS = II->getArgOperand(1);
3365 
3366   // Canonicalize immediate to the RHS.
3367   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3368     std::swap(LHS, RHS);
3369 
3370   // Simplify multiplies.
3371   Intrinsic::ID IID = II->getIntrinsicID();
3372   switch (IID) {
3373   default:
3374     break;
3375   case Intrinsic::smul_with_overflow:
3376     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3377       if (C->getValue() == 2)
3378         IID = Intrinsic::sadd_with_overflow;
3379     break;
3380   case Intrinsic::umul_with_overflow:
3381     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3382       if (C->getValue() == 2)
3383         IID = Intrinsic::uadd_with_overflow;
3384     break;
3385   }
3386 
3387   AArch64CC::CondCode TmpCC;
3388   switch (IID) {
3389   default:
3390     return false;
3391   case Intrinsic::sadd_with_overflow:
3392   case Intrinsic::ssub_with_overflow:
3393     TmpCC = AArch64CC::VS;
3394     break;
3395   case Intrinsic::uadd_with_overflow:
3396     TmpCC = AArch64CC::HS;
3397     break;
3398   case Intrinsic::usub_with_overflow:
3399     TmpCC = AArch64CC::LO;
3400     break;
3401   case Intrinsic::smul_with_overflow:
3402   case Intrinsic::umul_with_overflow:
3403     TmpCC = AArch64CC::NE;
3404     break;
3405   }
3406 
3407   // Check if both instructions are in the same basic block.
3408   if (!isValueAvailable(II))
3409     return false;
3410 
3411   // Make sure nothing is in the way
3412   BasicBlock::const_iterator Start(I);
3413   BasicBlock::const_iterator End(II);
3414   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3415     // We only expect extractvalue instructions between the intrinsic and the
3416     // instruction to be selected.
3417     if (!isa<ExtractValueInst>(Itr))
3418       return false;
3419 
3420     // Check that the extractvalue operand comes from the intrinsic.
3421     const auto *EVI = cast<ExtractValueInst>(Itr);
3422     if (EVI->getAggregateOperand() != II)
3423       return false;
3424   }
3425 
3426   CC = TmpCC;
3427   return true;
3428 }
3429 
fastLowerIntrinsicCall(const IntrinsicInst * II)3430 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3431   // FIXME: Handle more intrinsics.
3432   switch (II->getIntrinsicID()) {
3433   default: return false;
3434   case Intrinsic::frameaddress: {
3435     MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3436     MFI.setFrameAddressIsTaken(true);
3437 
3438     const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3439     Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3440     Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3441     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3442             TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3443     // Recursively load frame address
3444     // ldr x0, [fp]
3445     // ldr x0, [x0]
3446     // ldr x0, [x0]
3447     // ...
3448     unsigned DestReg;
3449     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3450     while (Depth--) {
3451       DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3452                                 SrcReg, 0);
3453       assert(DestReg && "Unexpected LDR instruction emission failure.");
3454       SrcReg = DestReg;
3455     }
3456 
3457     updateValueMap(II, SrcReg);
3458     return true;
3459   }
3460   case Intrinsic::sponentry: {
3461     MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3462 
3463     // SP = FP + Fixed Object + 16
3464     int FI = MFI.CreateFixedObject(4, 0, false);
3465     Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3466     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3467             TII.get(AArch64::ADDXri), ResultReg)
3468             .addFrameIndex(FI)
3469             .addImm(0)
3470             .addImm(0);
3471 
3472     updateValueMap(II, ResultReg);
3473     return true;
3474   }
3475   case Intrinsic::memcpy:
3476   case Intrinsic::memmove: {
3477     const auto *MTI = cast<MemTransferInst>(II);
3478     // Don't handle volatile.
3479     if (MTI->isVolatile())
3480       return false;
3481 
3482     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
3483     // we would emit dead code because we don't currently handle memmoves.
3484     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3485     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3486       // Small memcpy's are common enough that we want to do them without a call
3487       // if possible.
3488       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3489       MaybeAlign Alignment;
3490       if (MTI->getDestAlign() || MTI->getSourceAlign())
3491         Alignment = std::min(MTI->getDestAlign().valueOrOne(),
3492                              MTI->getSourceAlign().valueOrOne());
3493       if (isMemCpySmall(Len, Alignment)) {
3494         Address Dest, Src;
3495         if (!computeAddress(MTI->getRawDest(), Dest) ||
3496             !computeAddress(MTI->getRawSource(), Src))
3497           return false;
3498         if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3499           return true;
3500       }
3501     }
3502 
3503     if (!MTI->getLength()->getType()->isIntegerTy(64))
3504       return false;
3505 
3506     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3507       // Fast instruction selection doesn't support the special
3508       // address spaces.
3509       return false;
3510 
3511     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3512     return lowerCallTo(II, IntrMemName, II->arg_size() - 1);
3513   }
3514   case Intrinsic::memset: {
3515     const MemSetInst *MSI = cast<MemSetInst>(II);
3516     // Don't handle volatile.
3517     if (MSI->isVolatile())
3518       return false;
3519 
3520     if (!MSI->getLength()->getType()->isIntegerTy(64))
3521       return false;
3522 
3523     if (MSI->getDestAddressSpace() > 255)
3524       // Fast instruction selection doesn't support the special
3525       // address spaces.
3526       return false;
3527 
3528     return lowerCallTo(II, "memset", II->arg_size() - 1);
3529   }
3530   case Intrinsic::sin:
3531   case Intrinsic::cos:
3532   case Intrinsic::pow: {
3533     MVT RetVT;
3534     if (!isTypeLegal(II->getType(), RetVT))
3535       return false;
3536 
3537     if (RetVT != MVT::f32 && RetVT != MVT::f64)
3538       return false;
3539 
3540     static const RTLIB::Libcall LibCallTable[3][2] = {
3541       { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3542       { RTLIB::COS_F32, RTLIB::COS_F64 },
3543       { RTLIB::POW_F32, RTLIB::POW_F64 }
3544     };
3545     RTLIB::Libcall LC;
3546     bool Is64Bit = RetVT == MVT::f64;
3547     switch (II->getIntrinsicID()) {
3548     default:
3549       llvm_unreachable("Unexpected intrinsic.");
3550     case Intrinsic::sin:
3551       LC = LibCallTable[0][Is64Bit];
3552       break;
3553     case Intrinsic::cos:
3554       LC = LibCallTable[1][Is64Bit];
3555       break;
3556     case Intrinsic::pow:
3557       LC = LibCallTable[2][Is64Bit];
3558       break;
3559     }
3560 
3561     ArgListTy Args;
3562     Args.reserve(II->arg_size());
3563 
3564     // Populate the argument list.
3565     for (auto &Arg : II->args()) {
3566       ArgListEntry Entry;
3567       Entry.Val = Arg;
3568       Entry.Ty = Arg->getType();
3569       Args.push_back(Entry);
3570     }
3571 
3572     CallLoweringInfo CLI;
3573     MCContext &Ctx = MF->getContext();
3574     CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3575                   TLI.getLibcallName(LC), std::move(Args));
3576     if (!lowerCallTo(CLI))
3577       return false;
3578     updateValueMap(II, CLI.ResultReg);
3579     return true;
3580   }
3581   case Intrinsic::fabs: {
3582     MVT VT;
3583     if (!isTypeLegal(II->getType(), VT))
3584       return false;
3585 
3586     unsigned Opc;
3587     switch (VT.SimpleTy) {
3588     default:
3589       return false;
3590     case MVT::f32:
3591       Opc = AArch64::FABSSr;
3592       break;
3593     case MVT::f64:
3594       Opc = AArch64::FABSDr;
3595       break;
3596     }
3597     Register SrcReg = getRegForValue(II->getOperand(0));
3598     if (!SrcReg)
3599       return false;
3600     Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3601     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
3602       .addReg(SrcReg);
3603     updateValueMap(II, ResultReg);
3604     return true;
3605   }
3606   case Intrinsic::trap:
3607     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3608         .addImm(1);
3609     return true;
3610   case Intrinsic::debugtrap:
3611     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3612         .addImm(0xF000);
3613     return true;
3614 
3615   case Intrinsic::sqrt: {
3616     Type *RetTy = II->getCalledFunction()->getReturnType();
3617 
3618     MVT VT;
3619     if (!isTypeLegal(RetTy, VT))
3620       return false;
3621 
3622     Register Op0Reg = getRegForValue(II->getOperand(0));
3623     if (!Op0Reg)
3624       return false;
3625 
3626     unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg);
3627     if (!ResultReg)
3628       return false;
3629 
3630     updateValueMap(II, ResultReg);
3631     return true;
3632   }
3633   case Intrinsic::sadd_with_overflow:
3634   case Intrinsic::uadd_with_overflow:
3635   case Intrinsic::ssub_with_overflow:
3636   case Intrinsic::usub_with_overflow:
3637   case Intrinsic::smul_with_overflow:
3638   case Intrinsic::umul_with_overflow: {
3639     // This implements the basic lowering of the xalu with overflow intrinsics.
3640     const Function *Callee = II->getCalledFunction();
3641     auto *Ty = cast<StructType>(Callee->getReturnType());
3642     Type *RetTy = Ty->getTypeAtIndex(0U);
3643 
3644     MVT VT;
3645     if (!isTypeLegal(RetTy, VT))
3646       return false;
3647 
3648     if (VT != MVT::i32 && VT != MVT::i64)
3649       return false;
3650 
3651     const Value *LHS = II->getArgOperand(0);
3652     const Value *RHS = II->getArgOperand(1);
3653     // Canonicalize immediate to the RHS.
3654     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3655       std::swap(LHS, RHS);
3656 
3657     // Simplify multiplies.
3658     Intrinsic::ID IID = II->getIntrinsicID();
3659     switch (IID) {
3660     default:
3661       break;
3662     case Intrinsic::smul_with_overflow:
3663       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3664         if (C->getValue() == 2) {
3665           IID = Intrinsic::sadd_with_overflow;
3666           RHS = LHS;
3667         }
3668       break;
3669     case Intrinsic::umul_with_overflow:
3670       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3671         if (C->getValue() == 2) {
3672           IID = Intrinsic::uadd_with_overflow;
3673           RHS = LHS;
3674         }
3675       break;
3676     }
3677 
3678     unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3679     AArch64CC::CondCode CC = AArch64CC::Invalid;
3680     switch (IID) {
3681     default: llvm_unreachable("Unexpected intrinsic!");
3682     case Intrinsic::sadd_with_overflow:
3683       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3684       CC = AArch64CC::VS;
3685       break;
3686     case Intrinsic::uadd_with_overflow:
3687       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3688       CC = AArch64CC::HS;
3689       break;
3690     case Intrinsic::ssub_with_overflow:
3691       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3692       CC = AArch64CC::VS;
3693       break;
3694     case Intrinsic::usub_with_overflow:
3695       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3696       CC = AArch64CC::LO;
3697       break;
3698     case Intrinsic::smul_with_overflow: {
3699       CC = AArch64CC::NE;
3700       Register LHSReg = getRegForValue(LHS);
3701       if (!LHSReg)
3702         return false;
3703 
3704       Register RHSReg = getRegForValue(RHS);
3705       if (!RHSReg)
3706         return false;
3707 
3708       if (VT == MVT::i32) {
3709         MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
3710         Register MulSubReg =
3711             fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3712         // cmp xreg, wreg, sxtw
3713         emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg,
3714                       AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true,
3715                       /*WantResult=*/false);
3716         MulReg = MulSubReg;
3717       } else {
3718         assert(VT == MVT::i64 && "Unexpected value type.");
3719         // LHSReg and RHSReg cannot be killed by this Mul, since they are
3720         // reused in the next instruction.
3721         MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3722         unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg);
3723         emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63,
3724                     /*WantResult=*/false);
3725       }
3726       break;
3727     }
3728     case Intrinsic::umul_with_overflow: {
3729       CC = AArch64CC::NE;
3730       Register LHSReg = getRegForValue(LHS);
3731       if (!LHSReg)
3732         return false;
3733 
3734       Register RHSReg = getRegForValue(RHS);
3735       if (!RHSReg)
3736         return false;
3737 
3738       if (VT == MVT::i32) {
3739         MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);
3740         // tst xreg, #0xffffffff00000000
3741         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3742                 TII.get(AArch64::ANDSXri), AArch64::XZR)
3743             .addReg(MulReg)
3744             .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64));
3745         MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3746       } else {
3747         assert(VT == MVT::i64 && "Unexpected value type.");
3748         // LHSReg and RHSReg cannot be killed by this Mul, since they are
3749         // reused in the next instruction.
3750         MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3751         unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg);
3752         emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false);
3753       }
3754       break;
3755     }
3756     }
3757 
3758     if (MulReg) {
3759       ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3760       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3761               TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3762     }
3763 
3764     if (!ResultReg1)
3765       return false;
3766 
3767     ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3768                                   AArch64::WZR, AArch64::WZR,
3769                                   getInvertedCondCode(CC));
3770     (void)ResultReg2;
3771     assert((ResultReg1 + 1) == ResultReg2 &&
3772            "Nonconsecutive result registers.");
3773     updateValueMap(II, ResultReg1, 2);
3774     return true;
3775   }
3776   case Intrinsic::aarch64_crc32b:
3777   case Intrinsic::aarch64_crc32h:
3778   case Intrinsic::aarch64_crc32w:
3779   case Intrinsic::aarch64_crc32x:
3780   case Intrinsic::aarch64_crc32cb:
3781   case Intrinsic::aarch64_crc32ch:
3782   case Intrinsic::aarch64_crc32cw:
3783   case Intrinsic::aarch64_crc32cx: {
3784     if (!Subtarget->hasCRC())
3785       return false;
3786 
3787     unsigned Opc;
3788     switch (II->getIntrinsicID()) {
3789     default:
3790       llvm_unreachable("Unexpected intrinsic!");
3791     case Intrinsic::aarch64_crc32b:
3792       Opc = AArch64::CRC32Brr;
3793       break;
3794     case Intrinsic::aarch64_crc32h:
3795       Opc = AArch64::CRC32Hrr;
3796       break;
3797     case Intrinsic::aarch64_crc32w:
3798       Opc = AArch64::CRC32Wrr;
3799       break;
3800     case Intrinsic::aarch64_crc32x:
3801       Opc = AArch64::CRC32Xrr;
3802       break;
3803     case Intrinsic::aarch64_crc32cb:
3804       Opc = AArch64::CRC32CBrr;
3805       break;
3806     case Intrinsic::aarch64_crc32ch:
3807       Opc = AArch64::CRC32CHrr;
3808       break;
3809     case Intrinsic::aarch64_crc32cw:
3810       Opc = AArch64::CRC32CWrr;
3811       break;
3812     case Intrinsic::aarch64_crc32cx:
3813       Opc = AArch64::CRC32CXrr;
3814       break;
3815     }
3816 
3817     Register LHSReg = getRegForValue(II->getArgOperand(0));
3818     Register RHSReg = getRegForValue(II->getArgOperand(1));
3819     if (!LHSReg || !RHSReg)
3820       return false;
3821 
3822     Register ResultReg =
3823         fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, LHSReg, RHSReg);
3824     updateValueMap(II, ResultReg);
3825     return true;
3826   }
3827   }
3828   return false;
3829 }
3830 
selectRet(const Instruction * I)3831 bool AArch64FastISel::selectRet(const Instruction *I) {
3832   const ReturnInst *Ret = cast<ReturnInst>(I);
3833   const Function &F = *I->getParent()->getParent();
3834 
3835   if (!FuncInfo.CanLowerReturn)
3836     return false;
3837 
3838   if (F.isVarArg())
3839     return false;
3840 
3841   if (TLI.supportSwiftError() &&
3842       F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3843     return false;
3844 
3845   if (TLI.supportSplitCSR(FuncInfo.MF))
3846     return false;
3847 
3848   // Build a list of return value registers.
3849   SmallVector<unsigned, 4> RetRegs;
3850 
3851   if (Ret->getNumOperands() > 0) {
3852     CallingConv::ID CC = F.getCallingConv();
3853     SmallVector<ISD::OutputArg, 4> Outs;
3854     GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3855 
3856     // Analyze operands of the call, assigning locations to each operand.
3857     SmallVector<CCValAssign, 16> ValLocs;
3858     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3859     CCInfo.AnalyzeReturn(Outs, RetCC_AArch64_AAPCS);
3860 
3861     // Only handle a single return value for now.
3862     if (ValLocs.size() != 1)
3863       return false;
3864 
3865     CCValAssign &VA = ValLocs[0];
3866     const Value *RV = Ret->getOperand(0);
3867 
3868     // Don't bother handling odd stuff for now.
3869     if ((VA.getLocInfo() != CCValAssign::Full) &&
3870         (VA.getLocInfo() != CCValAssign::BCvt))
3871       return false;
3872 
3873     // Only handle register returns for now.
3874     if (!VA.isRegLoc())
3875       return false;
3876 
3877     Register Reg = getRegForValue(RV);
3878     if (Reg == 0)
3879       return false;
3880 
3881     unsigned SrcReg = Reg + VA.getValNo();
3882     Register DestReg = VA.getLocReg();
3883     // Avoid a cross-class copy. This is very unlikely.
3884     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3885       return false;
3886 
3887     EVT RVEVT = TLI.getValueType(DL, RV->getType());
3888     if (!RVEVT.isSimple())
3889       return false;
3890 
3891     // Vectors (of > 1 lane) in big endian need tricky handling.
3892     if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
3893         !Subtarget->isLittleEndian())
3894       return false;
3895 
3896     MVT RVVT = RVEVT.getSimpleVT();
3897     if (RVVT == MVT::f128)
3898       return false;
3899 
3900     MVT DestVT = VA.getValVT();
3901     // Special handling for extended integers.
3902     if (RVVT != DestVT) {
3903       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3904         return false;
3905 
3906       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3907         return false;
3908 
3909       bool IsZExt = Outs[0].Flags.isZExt();
3910       SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3911       if (SrcReg == 0)
3912         return false;
3913     }
3914 
3915     // "Callee" (i.e. value producer) zero extends pointers at function
3916     // boundary.
3917     if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
3918       SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff);
3919 
3920     // Make the copy.
3921     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3922             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3923 
3924     // Add register to return instruction.
3925     RetRegs.push_back(VA.getLocReg());
3926   }
3927 
3928   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3929                                     TII.get(AArch64::RET_ReallyLR));
3930   for (unsigned RetReg : RetRegs)
3931     MIB.addReg(RetReg, RegState::Implicit);
3932   return true;
3933 }
3934 
selectTrunc(const Instruction * I)3935 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3936   Type *DestTy = I->getType();
3937   Value *Op = I->getOperand(0);
3938   Type *SrcTy = Op->getType();
3939 
3940   EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3941   EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3942   if (!SrcEVT.isSimple())
3943     return false;
3944   if (!DestEVT.isSimple())
3945     return false;
3946 
3947   MVT SrcVT = SrcEVT.getSimpleVT();
3948   MVT DestVT = DestEVT.getSimpleVT();
3949 
3950   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3951       SrcVT != MVT::i8)
3952     return false;
3953   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3954       DestVT != MVT::i1)
3955     return false;
3956 
3957   Register SrcReg = getRegForValue(Op);
3958   if (!SrcReg)
3959     return false;
3960 
3961   // If we're truncating from i64 to a smaller non-legal type then generate an
3962   // AND. Otherwise, we know the high bits are undefined and a truncate only
3963   // generate a COPY. We cannot mark the source register also as result
3964   // register, because this can incorrectly transfer the kill flag onto the
3965   // source register.
3966   unsigned ResultReg;
3967   if (SrcVT == MVT::i64) {
3968     uint64_t Mask = 0;
3969     switch (DestVT.SimpleTy) {
3970     default:
3971       // Trunc i64 to i32 is handled by the target-independent fast-isel.
3972       return false;
3973     case MVT::i1:
3974       Mask = 0x1;
3975       break;
3976     case MVT::i8:
3977       Mask = 0xff;
3978       break;
3979     case MVT::i16:
3980       Mask = 0xffff;
3981       break;
3982     }
3983     // Issue an extract_subreg to get the lower 32-bits.
3984     Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
3985                                                 AArch64::sub_32);
3986     // Create the AND instruction which performs the actual truncation.
3987     ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
3988     assert(ResultReg && "Unexpected AND instruction emission failure.");
3989   } else {
3990     ResultReg = createResultReg(&AArch64::GPR32RegClass);
3991     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3992             TII.get(TargetOpcode::COPY), ResultReg)
3993         .addReg(SrcReg);
3994   }
3995 
3996   updateValueMap(I, ResultReg);
3997   return true;
3998 }
3999 
emiti1Ext(unsigned SrcReg,MVT DestVT,bool IsZExt)4000 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
4001   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
4002           DestVT == MVT::i64) &&
4003          "Unexpected value type.");
4004   // Handle i8 and i16 as i32.
4005   if (DestVT == MVT::i8 || DestVT == MVT::i16)
4006     DestVT = MVT::i32;
4007 
4008   if (IsZExt) {
4009     unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1);
4010     assert(ResultReg && "Unexpected AND instruction emission failure.");
4011     if (DestVT == MVT::i64) {
4012       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
4013       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
4014       Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4015       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4016               TII.get(AArch64::SUBREG_TO_REG), Reg64)
4017           .addImm(0)
4018           .addReg(ResultReg)
4019           .addImm(AArch64::sub_32);
4020       ResultReg = Reg64;
4021     }
4022     return ResultReg;
4023   } else {
4024     if (DestVT == MVT::i64) {
4025       // FIXME: We're SExt i1 to i64.
4026       return 0;
4027     }
4028     return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
4029                             0, 0);
4030   }
4031 }
4032 
emitMul_rr(MVT RetVT,unsigned Op0,unsigned Op1)4033 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4034   unsigned Opc, ZReg;
4035   switch (RetVT.SimpleTy) {
4036   default: return 0;
4037   case MVT::i8:
4038   case MVT::i16:
4039   case MVT::i32:
4040     RetVT = MVT::i32;
4041     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
4042   case MVT::i64:
4043     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
4044   }
4045 
4046   const TargetRegisterClass *RC =
4047       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4048   return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg);
4049 }
4050 
emitSMULL_rr(MVT RetVT,unsigned Op0,unsigned Op1)4051 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4052   if (RetVT != MVT::i64)
4053     return 0;
4054 
4055   return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4056                           Op0, Op1, AArch64::XZR);
4057 }
4058 
emitUMULL_rr(MVT RetVT,unsigned Op0,unsigned Op1)4059 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4060   if (RetVT != MVT::i64)
4061     return 0;
4062 
4063   return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4064                           Op0, Op1, AArch64::XZR);
4065 }
4066 
emitLSL_rr(MVT RetVT,unsigned Op0Reg,unsigned Op1Reg)4067 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg,
4068                                      unsigned Op1Reg) {
4069   unsigned Opc = 0;
4070   bool NeedTrunc = false;
4071   uint64_t Mask = 0;
4072   switch (RetVT.SimpleTy) {
4073   default: return 0;
4074   case MVT::i8:  Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff;   break;
4075   case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4076   case MVT::i32: Opc = AArch64::LSLVWr;                                  break;
4077   case MVT::i64: Opc = AArch64::LSLVXr;                                  break;
4078   }
4079 
4080   const TargetRegisterClass *RC =
4081       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4082   if (NeedTrunc)
4083     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4084 
4085   Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4086   if (NeedTrunc)
4087     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4088   return ResultReg;
4089 }
4090 
emitLSL_ri(MVT RetVT,MVT SrcVT,unsigned Op0,uint64_t Shift,bool IsZExt)4091 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4092                                      uint64_t Shift, bool IsZExt) {
4093   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4094          "Unexpected source/return type pair.");
4095   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4096           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4097          "Unexpected source value type.");
4098   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4099           RetVT == MVT::i64) && "Unexpected return value type.");
4100 
4101   bool Is64Bit = (RetVT == MVT::i64);
4102   unsigned RegSize = Is64Bit ? 64 : 32;
4103   unsigned DstBits = RetVT.getSizeInBits();
4104   unsigned SrcBits = SrcVT.getSizeInBits();
4105   const TargetRegisterClass *RC =
4106       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4107 
4108   // Just emit a copy for "zero" shifts.
4109   if (Shift == 0) {
4110     if (RetVT == SrcVT) {
4111       Register ResultReg = createResultReg(RC);
4112       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4113               TII.get(TargetOpcode::COPY), ResultReg)
4114           .addReg(Op0);
4115       return ResultReg;
4116     } else
4117       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4118   }
4119 
4120   // Don't deal with undefined shifts.
4121   if (Shift >= DstBits)
4122     return 0;
4123 
4124   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4125   // {S|U}BFM Wd, Wn, #r, #s
4126   // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4127 
4128   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4129   // %2 = shl i16 %1, 4
4130   // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4131   // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4132   // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4133   // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4134 
4135   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4136   // %2 = shl i16 %1, 8
4137   // Wd<32+7-24,32-24> = Wn<7:0>
4138   // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4139   // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4140   // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4141 
4142   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4143   // %2 = shl i16 %1, 12
4144   // Wd<32+3-20,32-20> = Wn<3:0>
4145   // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4146   // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4147   // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4148 
4149   unsigned ImmR = RegSize - Shift;
4150   // Limit the width to the length of the source type.
4151   unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4152   static const unsigned OpcTable[2][2] = {
4153     {AArch64::SBFMWri, AArch64::SBFMXri},
4154     {AArch64::UBFMWri, AArch64::UBFMXri}
4155   };
4156   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4157   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4158     Register TmpReg = MRI.createVirtualRegister(RC);
4159     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4160             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4161         .addImm(0)
4162         .addReg(Op0)
4163         .addImm(AArch64::sub_32);
4164     Op0 = TmpReg;
4165   }
4166   return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4167 }
4168 
emitLSR_rr(MVT RetVT,unsigned Op0Reg,unsigned Op1Reg)4169 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg,
4170                                      unsigned Op1Reg) {
4171   unsigned Opc = 0;
4172   bool NeedTrunc = false;
4173   uint64_t Mask = 0;
4174   switch (RetVT.SimpleTy) {
4175   default: return 0;
4176   case MVT::i8:  Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff;   break;
4177   case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4178   case MVT::i32: Opc = AArch64::LSRVWr; break;
4179   case MVT::i64: Opc = AArch64::LSRVXr; break;
4180   }
4181 
4182   const TargetRegisterClass *RC =
4183       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4184   if (NeedTrunc) {
4185     Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
4186     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4187   }
4188   Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4189   if (NeedTrunc)
4190     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4191   return ResultReg;
4192 }
4193 
emitLSR_ri(MVT RetVT,MVT SrcVT,unsigned Op0,uint64_t Shift,bool IsZExt)4194 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4195                                      uint64_t Shift, bool IsZExt) {
4196   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4197          "Unexpected source/return type pair.");
4198   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4199           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4200          "Unexpected source value type.");
4201   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4202           RetVT == MVT::i64) && "Unexpected return value type.");
4203 
4204   bool Is64Bit = (RetVT == MVT::i64);
4205   unsigned RegSize = Is64Bit ? 64 : 32;
4206   unsigned DstBits = RetVT.getSizeInBits();
4207   unsigned SrcBits = SrcVT.getSizeInBits();
4208   const TargetRegisterClass *RC =
4209       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4210 
4211   // Just emit a copy for "zero" shifts.
4212   if (Shift == 0) {
4213     if (RetVT == SrcVT) {
4214       Register ResultReg = createResultReg(RC);
4215       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4216               TII.get(TargetOpcode::COPY), ResultReg)
4217       .addReg(Op0);
4218       return ResultReg;
4219     } else
4220       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4221   }
4222 
4223   // Don't deal with undefined shifts.
4224   if (Shift >= DstBits)
4225     return 0;
4226 
4227   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4228   // {S|U}BFM Wd, Wn, #r, #s
4229   // Wd<s-r:0> = Wn<s:r> when r <= s
4230 
4231   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4232   // %2 = lshr i16 %1, 4
4233   // Wd<7-4:0> = Wn<7:4>
4234   // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4235   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4236   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4237 
4238   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4239   // %2 = lshr i16 %1, 8
4240   // Wd<7-7,0> = Wn<7:7>
4241   // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4242   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4243   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4244 
4245   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4246   // %2 = lshr i16 %1, 12
4247   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4248   // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4249   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4250   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4251 
4252   if (Shift >= SrcBits && IsZExt)
4253     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4254 
4255   // It is not possible to fold a sign-extend into the LShr instruction. In this
4256   // case emit a sign-extend.
4257   if (!IsZExt) {
4258     Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4259     if (!Op0)
4260       return 0;
4261     SrcVT = RetVT;
4262     SrcBits = SrcVT.getSizeInBits();
4263     IsZExt = true;
4264   }
4265 
4266   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4267   unsigned ImmS = SrcBits - 1;
4268   static const unsigned OpcTable[2][2] = {
4269     {AArch64::SBFMWri, AArch64::SBFMXri},
4270     {AArch64::UBFMWri, AArch64::UBFMXri}
4271   };
4272   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4273   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4274     Register TmpReg = MRI.createVirtualRegister(RC);
4275     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4276             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4277         .addImm(0)
4278         .addReg(Op0)
4279         .addImm(AArch64::sub_32);
4280     Op0 = TmpReg;
4281   }
4282   return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4283 }
4284 
emitASR_rr(MVT RetVT,unsigned Op0Reg,unsigned Op1Reg)4285 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg,
4286                                      unsigned Op1Reg) {
4287   unsigned Opc = 0;
4288   bool NeedTrunc = false;
4289   uint64_t Mask = 0;
4290   switch (RetVT.SimpleTy) {
4291   default: return 0;
4292   case MVT::i8:  Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff;   break;
4293   case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4294   case MVT::i32: Opc = AArch64::ASRVWr;                                  break;
4295   case MVT::i64: Opc = AArch64::ASRVXr;                                  break;
4296   }
4297 
4298   const TargetRegisterClass *RC =
4299       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4300   if (NeedTrunc) {
4301     Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4302     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4303   }
4304   Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4305   if (NeedTrunc)
4306     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4307   return ResultReg;
4308 }
4309 
emitASR_ri(MVT RetVT,MVT SrcVT,unsigned Op0,uint64_t Shift,bool IsZExt)4310 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4311                                      uint64_t Shift, bool IsZExt) {
4312   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4313          "Unexpected source/return type pair.");
4314   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4315           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4316          "Unexpected source value type.");
4317   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4318           RetVT == MVT::i64) && "Unexpected return value type.");
4319 
4320   bool Is64Bit = (RetVT == MVT::i64);
4321   unsigned RegSize = Is64Bit ? 64 : 32;
4322   unsigned DstBits = RetVT.getSizeInBits();
4323   unsigned SrcBits = SrcVT.getSizeInBits();
4324   const TargetRegisterClass *RC =
4325       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4326 
4327   // Just emit a copy for "zero" shifts.
4328   if (Shift == 0) {
4329     if (RetVT == SrcVT) {
4330       Register ResultReg = createResultReg(RC);
4331       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4332               TII.get(TargetOpcode::COPY), ResultReg)
4333       .addReg(Op0);
4334       return ResultReg;
4335     } else
4336       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4337   }
4338 
4339   // Don't deal with undefined shifts.
4340   if (Shift >= DstBits)
4341     return 0;
4342 
4343   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4344   // {S|U}BFM Wd, Wn, #r, #s
4345   // Wd<s-r:0> = Wn<s:r> when r <= s
4346 
4347   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4348   // %2 = ashr i16 %1, 4
4349   // Wd<7-4:0> = Wn<7:4>
4350   // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4351   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4352   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4353 
4354   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4355   // %2 = ashr i16 %1, 8
4356   // Wd<7-7,0> = Wn<7:7>
4357   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4358   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4359   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4360 
4361   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4362   // %2 = ashr i16 %1, 12
4363   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4364   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4365   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4366   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4367 
4368   if (Shift >= SrcBits && IsZExt)
4369     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4370 
4371   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4372   unsigned ImmS = SrcBits - 1;
4373   static const unsigned OpcTable[2][2] = {
4374     {AArch64::SBFMWri, AArch64::SBFMXri},
4375     {AArch64::UBFMWri, AArch64::UBFMXri}
4376   };
4377   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4378   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4379     Register TmpReg = MRI.createVirtualRegister(RC);
4380     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4381             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4382         .addImm(0)
4383         .addReg(Op0)
4384         .addImm(AArch64::sub_32);
4385     Op0 = TmpReg;
4386   }
4387   return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4388 }
4389 
emitIntExt(MVT SrcVT,unsigned SrcReg,MVT DestVT,bool IsZExt)4390 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4391                                      bool IsZExt) {
4392   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4393 
4394   // FastISel does not have plumbing to deal with extensions where the SrcVT or
4395   // DestVT are odd things, so test to make sure that they are both types we can
4396   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4397   // bail out to SelectionDAG.
4398   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4399        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4400       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
4401        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
4402     return 0;
4403 
4404   unsigned Opc;
4405   unsigned Imm = 0;
4406 
4407   switch (SrcVT.SimpleTy) {
4408   default:
4409     return 0;
4410   case MVT::i1:
4411     return emiti1Ext(SrcReg, DestVT, IsZExt);
4412   case MVT::i8:
4413     if (DestVT == MVT::i64)
4414       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4415     else
4416       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4417     Imm = 7;
4418     break;
4419   case MVT::i16:
4420     if (DestVT == MVT::i64)
4421       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4422     else
4423       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4424     Imm = 15;
4425     break;
4426   case MVT::i32:
4427     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4428     Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4429     Imm = 31;
4430     break;
4431   }
4432 
4433   // Handle i8 and i16 as i32.
4434   if (DestVT == MVT::i8 || DestVT == MVT::i16)
4435     DestVT = MVT::i32;
4436   else if (DestVT == MVT::i64) {
4437     Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4438     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4439             TII.get(AArch64::SUBREG_TO_REG), Src64)
4440         .addImm(0)
4441         .addReg(SrcReg)
4442         .addImm(AArch64::sub_32);
4443     SrcReg = Src64;
4444   }
4445 
4446   const TargetRegisterClass *RC =
4447       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4448   return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm);
4449 }
4450 
isZExtLoad(const MachineInstr * LI)4451 static bool isZExtLoad(const MachineInstr *LI) {
4452   switch (LI->getOpcode()) {
4453   default:
4454     return false;
4455   case AArch64::LDURBBi:
4456   case AArch64::LDURHHi:
4457   case AArch64::LDURWi:
4458   case AArch64::LDRBBui:
4459   case AArch64::LDRHHui:
4460   case AArch64::LDRWui:
4461   case AArch64::LDRBBroX:
4462   case AArch64::LDRHHroX:
4463   case AArch64::LDRWroX:
4464   case AArch64::LDRBBroW:
4465   case AArch64::LDRHHroW:
4466   case AArch64::LDRWroW:
4467     return true;
4468   }
4469 }
4470 
isSExtLoad(const MachineInstr * LI)4471 static bool isSExtLoad(const MachineInstr *LI) {
4472   switch (LI->getOpcode()) {
4473   default:
4474     return false;
4475   case AArch64::LDURSBWi:
4476   case AArch64::LDURSHWi:
4477   case AArch64::LDURSBXi:
4478   case AArch64::LDURSHXi:
4479   case AArch64::LDURSWi:
4480   case AArch64::LDRSBWui:
4481   case AArch64::LDRSHWui:
4482   case AArch64::LDRSBXui:
4483   case AArch64::LDRSHXui:
4484   case AArch64::LDRSWui:
4485   case AArch64::LDRSBWroX:
4486   case AArch64::LDRSHWroX:
4487   case AArch64::LDRSBXroX:
4488   case AArch64::LDRSHXroX:
4489   case AArch64::LDRSWroX:
4490   case AArch64::LDRSBWroW:
4491   case AArch64::LDRSHWroW:
4492   case AArch64::LDRSBXroW:
4493   case AArch64::LDRSHXroW:
4494   case AArch64::LDRSWroW:
4495     return true;
4496   }
4497 }
4498 
optimizeIntExtLoad(const Instruction * I,MVT RetVT,MVT SrcVT)4499 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4500                                          MVT SrcVT) {
4501   const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4502   if (!LI || !LI->hasOneUse())
4503     return false;
4504 
4505   // Check if the load instruction has already been selected.
4506   Register Reg = lookUpRegForValue(LI);
4507   if (!Reg)
4508     return false;
4509 
4510   MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4511   if (!MI)
4512     return false;
4513 
4514   // Check if the correct load instruction has been emitted - SelectionDAG might
4515   // have emitted a zero-extending load, but we need a sign-extending load.
4516   bool IsZExt = isa<ZExtInst>(I);
4517   const auto *LoadMI = MI;
4518   if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4519       LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4520     Register LoadReg = MI->getOperand(1).getReg();
4521     LoadMI = MRI.getUniqueVRegDef(LoadReg);
4522     assert(LoadMI && "Expected valid instruction");
4523   }
4524   if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4525     return false;
4526 
4527   // Nothing to be done.
4528   if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4529     updateValueMap(I, Reg);
4530     return true;
4531   }
4532 
4533   if (IsZExt) {
4534     Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
4535     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4536             TII.get(AArch64::SUBREG_TO_REG), Reg64)
4537         .addImm(0)
4538         .addReg(Reg, getKillRegState(true))
4539         .addImm(AArch64::sub_32);
4540     Reg = Reg64;
4541   } else {
4542     assert((MI->getOpcode() == TargetOpcode::COPY &&
4543             MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4544            "Expected copy instruction");
4545     Reg = MI->getOperand(1).getReg();
4546     MachineBasicBlock::iterator I(MI);
4547     removeDeadCode(I, std::next(I));
4548   }
4549   updateValueMap(I, Reg);
4550   return true;
4551 }
4552 
selectIntExt(const Instruction * I)4553 bool AArch64FastISel::selectIntExt(const Instruction *I) {
4554   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4555          "Unexpected integer extend instruction.");
4556   MVT RetVT;
4557   MVT SrcVT;
4558   if (!isTypeSupported(I->getType(), RetVT))
4559     return false;
4560 
4561   if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4562     return false;
4563 
4564   // Try to optimize already sign-/zero-extended values from load instructions.
4565   if (optimizeIntExtLoad(I, RetVT, SrcVT))
4566     return true;
4567 
4568   Register SrcReg = getRegForValue(I->getOperand(0));
4569   if (!SrcReg)
4570     return false;
4571 
4572   // Try to optimize already sign-/zero-extended values from function arguments.
4573   bool IsZExt = isa<ZExtInst>(I);
4574   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4575     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4576       if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4577         Register ResultReg = createResultReg(&AArch64::GPR64RegClass);
4578         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4579                 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4580             .addImm(0)
4581             .addReg(SrcReg)
4582             .addImm(AArch64::sub_32);
4583         SrcReg = ResultReg;
4584       }
4585 
4586       updateValueMap(I, SrcReg);
4587       return true;
4588     }
4589   }
4590 
4591   unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4592   if (!ResultReg)
4593     return false;
4594 
4595   updateValueMap(I, ResultReg);
4596   return true;
4597 }
4598 
selectRem(const Instruction * I,unsigned ISDOpcode)4599 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4600   EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4601   if (!DestEVT.isSimple())
4602     return false;
4603 
4604   MVT DestVT = DestEVT.getSimpleVT();
4605   if (DestVT != MVT::i64 && DestVT != MVT::i32)
4606     return false;
4607 
4608   unsigned DivOpc;
4609   bool Is64bit = (DestVT == MVT::i64);
4610   switch (ISDOpcode) {
4611   default:
4612     return false;
4613   case ISD::SREM:
4614     DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4615     break;
4616   case ISD::UREM:
4617     DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4618     break;
4619   }
4620   unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4621   Register Src0Reg = getRegForValue(I->getOperand(0));
4622   if (!Src0Reg)
4623     return false;
4624 
4625   Register Src1Reg = getRegForValue(I->getOperand(1));
4626   if (!Src1Reg)
4627     return false;
4628 
4629   const TargetRegisterClass *RC =
4630       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4631   Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
4632   assert(QuotReg && "Unexpected DIV instruction emission failure.");
4633   // The remainder is computed as numerator - (quotient * denominator) using the
4634   // MSUB instruction.
4635   Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
4636   updateValueMap(I, ResultReg);
4637   return true;
4638 }
4639 
selectMul(const Instruction * I)4640 bool AArch64FastISel::selectMul(const Instruction *I) {
4641   MVT VT;
4642   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4643     return false;
4644 
4645   if (VT.isVector())
4646     return selectBinaryOp(I, ISD::MUL);
4647 
4648   const Value *Src0 = I->getOperand(0);
4649   const Value *Src1 = I->getOperand(1);
4650   if (const auto *C = dyn_cast<ConstantInt>(Src0))
4651     if (C->getValue().isPowerOf2())
4652       std::swap(Src0, Src1);
4653 
4654   // Try to simplify to a shift instruction.
4655   if (const auto *C = dyn_cast<ConstantInt>(Src1))
4656     if (C->getValue().isPowerOf2()) {
4657       uint64_t ShiftVal = C->getValue().logBase2();
4658       MVT SrcVT = VT;
4659       bool IsZExt = true;
4660       if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4661         if (!isIntExtFree(ZExt)) {
4662           MVT VT;
4663           if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4664             SrcVT = VT;
4665             IsZExt = true;
4666             Src0 = ZExt->getOperand(0);
4667           }
4668         }
4669       } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4670         if (!isIntExtFree(SExt)) {
4671           MVT VT;
4672           if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4673             SrcVT = VT;
4674             IsZExt = false;
4675             Src0 = SExt->getOperand(0);
4676           }
4677         }
4678       }
4679 
4680       Register Src0Reg = getRegForValue(Src0);
4681       if (!Src0Reg)
4682         return false;
4683 
4684       unsigned ResultReg =
4685           emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt);
4686 
4687       if (ResultReg) {
4688         updateValueMap(I, ResultReg);
4689         return true;
4690       }
4691     }
4692 
4693   Register Src0Reg = getRegForValue(I->getOperand(0));
4694   if (!Src0Reg)
4695     return false;
4696 
4697   Register Src1Reg = getRegForValue(I->getOperand(1));
4698   if (!Src1Reg)
4699     return false;
4700 
4701   unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg);
4702 
4703   if (!ResultReg)
4704     return false;
4705 
4706   updateValueMap(I, ResultReg);
4707   return true;
4708 }
4709 
selectShift(const Instruction * I)4710 bool AArch64FastISel::selectShift(const Instruction *I) {
4711   MVT RetVT;
4712   if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4713     return false;
4714 
4715   if (RetVT.isVector())
4716     return selectOperator(I, I->getOpcode());
4717 
4718   if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4719     unsigned ResultReg = 0;
4720     uint64_t ShiftVal = C->getZExtValue();
4721     MVT SrcVT = RetVT;
4722     bool IsZExt = I->getOpcode() != Instruction::AShr;
4723     const Value *Op0 = I->getOperand(0);
4724     if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4725       if (!isIntExtFree(ZExt)) {
4726         MVT TmpVT;
4727         if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4728           SrcVT = TmpVT;
4729           IsZExt = true;
4730           Op0 = ZExt->getOperand(0);
4731         }
4732       }
4733     } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4734       if (!isIntExtFree(SExt)) {
4735         MVT TmpVT;
4736         if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4737           SrcVT = TmpVT;
4738           IsZExt = false;
4739           Op0 = SExt->getOperand(0);
4740         }
4741       }
4742     }
4743 
4744     Register Op0Reg = getRegForValue(Op0);
4745     if (!Op0Reg)
4746       return false;
4747 
4748     switch (I->getOpcode()) {
4749     default: llvm_unreachable("Unexpected instruction.");
4750     case Instruction::Shl:
4751       ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4752       break;
4753     case Instruction::AShr:
4754       ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4755       break;
4756     case Instruction::LShr:
4757       ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4758       break;
4759     }
4760     if (!ResultReg)
4761       return false;
4762 
4763     updateValueMap(I, ResultReg);
4764     return true;
4765   }
4766 
4767   Register Op0Reg = getRegForValue(I->getOperand(0));
4768   if (!Op0Reg)
4769     return false;
4770 
4771   Register Op1Reg = getRegForValue(I->getOperand(1));
4772   if (!Op1Reg)
4773     return false;
4774 
4775   unsigned ResultReg = 0;
4776   switch (I->getOpcode()) {
4777   default: llvm_unreachable("Unexpected instruction.");
4778   case Instruction::Shl:
4779     ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
4780     break;
4781   case Instruction::AShr:
4782     ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
4783     break;
4784   case Instruction::LShr:
4785     ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
4786     break;
4787   }
4788 
4789   if (!ResultReg)
4790     return false;
4791 
4792   updateValueMap(I, ResultReg);
4793   return true;
4794 }
4795 
selectBitCast(const Instruction * I)4796 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4797   MVT RetVT, SrcVT;
4798 
4799   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4800     return false;
4801   if (!isTypeLegal(I->getType(), RetVT))
4802     return false;
4803 
4804   unsigned Opc;
4805   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4806     Opc = AArch64::FMOVWSr;
4807   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4808     Opc = AArch64::FMOVXDr;
4809   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4810     Opc = AArch64::FMOVSWr;
4811   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4812     Opc = AArch64::FMOVDXr;
4813   else
4814     return false;
4815 
4816   const TargetRegisterClass *RC = nullptr;
4817   switch (RetVT.SimpleTy) {
4818   default: llvm_unreachable("Unexpected value type.");
4819   case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4820   case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4821   case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4822   case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4823   }
4824   Register Op0Reg = getRegForValue(I->getOperand(0));
4825   if (!Op0Reg)
4826     return false;
4827 
4828   Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
4829   if (!ResultReg)
4830     return false;
4831 
4832   updateValueMap(I, ResultReg);
4833   return true;
4834 }
4835 
selectFRem(const Instruction * I)4836 bool AArch64FastISel::selectFRem(const Instruction *I) {
4837   MVT RetVT;
4838   if (!isTypeLegal(I->getType(), RetVT))
4839     return false;
4840 
4841   RTLIB::Libcall LC;
4842   switch (RetVT.SimpleTy) {
4843   default:
4844     return false;
4845   case MVT::f32:
4846     LC = RTLIB::REM_F32;
4847     break;
4848   case MVT::f64:
4849     LC = RTLIB::REM_F64;
4850     break;
4851   }
4852 
4853   ArgListTy Args;
4854   Args.reserve(I->getNumOperands());
4855 
4856   // Populate the argument list.
4857   for (auto &Arg : I->operands()) {
4858     ArgListEntry Entry;
4859     Entry.Val = Arg;
4860     Entry.Ty = Arg->getType();
4861     Args.push_back(Entry);
4862   }
4863 
4864   CallLoweringInfo CLI;
4865   MCContext &Ctx = MF->getContext();
4866   CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4867                 TLI.getLibcallName(LC), std::move(Args));
4868   if (!lowerCallTo(CLI))
4869     return false;
4870   updateValueMap(I, CLI.ResultReg);
4871   return true;
4872 }
4873 
selectSDiv(const Instruction * I)4874 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4875   MVT VT;
4876   if (!isTypeLegal(I->getType(), VT))
4877     return false;
4878 
4879   if (!isa<ConstantInt>(I->getOperand(1)))
4880     return selectBinaryOp(I, ISD::SDIV);
4881 
4882   const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4883   if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4884       !(C.isPowerOf2() || C.isNegatedPowerOf2()))
4885     return selectBinaryOp(I, ISD::SDIV);
4886 
4887   unsigned Lg2 = C.countr_zero();
4888   Register Src0Reg = getRegForValue(I->getOperand(0));
4889   if (!Src0Reg)
4890     return false;
4891 
4892   if (cast<BinaryOperator>(I)->isExact()) {
4893     unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2);
4894     if (!ResultReg)
4895       return false;
4896     updateValueMap(I, ResultReg);
4897     return true;
4898   }
4899 
4900   int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4901   unsigned AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne);
4902   if (!AddReg)
4903     return false;
4904 
4905   // (Src0 < 0) ? Pow2 - 1 : 0;
4906   if (!emitICmp_ri(VT, Src0Reg, 0))
4907     return false;
4908 
4909   unsigned SelectOpc;
4910   const TargetRegisterClass *RC;
4911   if (VT == MVT::i64) {
4912     SelectOpc = AArch64::CSELXr;
4913     RC = &AArch64::GPR64RegClass;
4914   } else {
4915     SelectOpc = AArch64::CSELWr;
4916     RC = &AArch64::GPR32RegClass;
4917   }
4918   Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
4919                                         AArch64CC::LT);
4920   if (!SelectReg)
4921     return false;
4922 
4923   // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4924   // negate the result.
4925   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4926   unsigned ResultReg;
4927   if (C.isNegative())
4928     ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg,
4929                               AArch64_AM::ASR, Lg2);
4930   else
4931     ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2);
4932 
4933   if (!ResultReg)
4934     return false;
4935 
4936   updateValueMap(I, ResultReg);
4937   return true;
4938 }
4939 
4940 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4941 /// have to duplicate it for AArch64, because otherwise we would fail during the
4942 /// sign-extend emission.
getRegForGEPIndex(const Value * Idx)4943 unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4944   Register IdxN = getRegForValue(Idx);
4945   if (IdxN == 0)
4946     // Unhandled operand. Halt "fast" selection and bail.
4947     return 0;
4948 
4949   // If the index is smaller or larger than intptr_t, truncate or extend it.
4950   MVT PtrVT = TLI.getPointerTy(DL);
4951   EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4952   if (IdxVT.bitsLT(PtrVT)) {
4953     IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
4954   } else if (IdxVT.bitsGT(PtrVT))
4955     llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4956   return IdxN;
4957 }
4958 
4959 /// This is mostly a copy of the existing FastISel GEP code, but we have to
4960 /// duplicate it for AArch64, because otherwise we would bail out even for
4961 /// simple cases. This is because the standard fastEmit functions don't cover
4962 /// MUL at all and ADD is lowered very inefficientily.
selectGetElementPtr(const Instruction * I)4963 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4964   if (Subtarget->isTargetILP32())
4965     return false;
4966 
4967   Register N = getRegForValue(I->getOperand(0));
4968   if (!N)
4969     return false;
4970 
4971   // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4972   // into a single N = N + TotalOffset.
4973   uint64_t TotalOffs = 0;
4974   MVT VT = TLI.getPointerTy(DL);
4975   for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
4976        GTI != E; ++GTI) {
4977     const Value *Idx = GTI.getOperand();
4978     if (auto *StTy = GTI.getStructTypeOrNull()) {
4979       unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4980       // N = N + Offset
4981       if (Field)
4982         TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4983     } else {
4984       // If this is a constant subscript, handle it quickly.
4985       if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4986         if (CI->isZero())
4987           continue;
4988         // N = N + Offset
4989         TotalOffs += GTI.getSequentialElementStride(DL) *
4990                      cast<ConstantInt>(CI)->getSExtValue();
4991         continue;
4992       }
4993       if (TotalOffs) {
4994         N = emitAdd_ri_(VT, N, TotalOffs);
4995         if (!N)
4996           return false;
4997         TotalOffs = 0;
4998       }
4999 
5000       // N = N + Idx * ElementSize;
5001       uint64_t ElementSize = GTI.getSequentialElementStride(DL);
5002       unsigned IdxN = getRegForGEPIndex(Idx);
5003       if (!IdxN)
5004         return false;
5005 
5006       if (ElementSize != 1) {
5007         unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
5008         if (!C)
5009           return false;
5010         IdxN = emitMul_rr(VT, IdxN, C);
5011         if (!IdxN)
5012           return false;
5013       }
5014       N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
5015       if (!N)
5016         return false;
5017     }
5018   }
5019   if (TotalOffs) {
5020     N = emitAdd_ri_(VT, N, TotalOffs);
5021     if (!N)
5022       return false;
5023   }
5024   updateValueMap(I, N);
5025   return true;
5026 }
5027 
selectAtomicCmpXchg(const AtomicCmpXchgInst * I)5028 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
5029   assert(TM.getOptLevel() == CodeGenOptLevel::None &&
5030          "cmpxchg survived AtomicExpand at optlevel > -O0");
5031 
5032   auto *RetPairTy = cast<StructType>(I->getType());
5033   Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5034   assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5035          "cmpxchg has a non-i1 status result");
5036 
5037   MVT VT;
5038   if (!isTypeLegal(RetTy, VT))
5039     return false;
5040 
5041   const TargetRegisterClass *ResRC;
5042   unsigned Opc, CmpOpc;
5043   // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5044   // extractvalue selection doesn't support that.
5045   if (VT == MVT::i32) {
5046     Opc = AArch64::CMP_SWAP_32;
5047     CmpOpc = AArch64::SUBSWrs;
5048     ResRC = &AArch64::GPR32RegClass;
5049   } else if (VT == MVT::i64) {
5050     Opc = AArch64::CMP_SWAP_64;
5051     CmpOpc = AArch64::SUBSXrs;
5052     ResRC = &AArch64::GPR64RegClass;
5053   } else {
5054     return false;
5055   }
5056 
5057   const MCInstrDesc &II = TII.get(Opc);
5058 
5059   const Register AddrReg = constrainOperandRegClass(
5060       II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5061   const Register DesiredReg = constrainOperandRegClass(
5062       II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5063   const Register NewReg = constrainOperandRegClass(
5064       II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5065 
5066   const Register ResultReg1 = createResultReg(ResRC);
5067   const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5068   const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5069 
5070   // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5071   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
5072       .addDef(ResultReg1)
5073       .addDef(ScratchReg)
5074       .addUse(AddrReg)
5075       .addUse(DesiredReg)
5076       .addUse(NewReg);
5077 
5078   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc))
5079       .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5080       .addUse(ResultReg1)
5081       .addUse(DesiredReg)
5082       .addImm(0);
5083 
5084   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr))
5085       .addDef(ResultReg2)
5086       .addUse(AArch64::WZR)
5087       .addUse(AArch64::WZR)
5088       .addImm(AArch64CC::NE);
5089 
5090   assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5091   updateValueMap(I, ResultReg1, 2);
5092   return true;
5093 }
5094 
fastSelectInstruction(const Instruction * I)5095 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5096   if (TLI.fallBackToDAGISel(*I))
5097     return false;
5098   switch (I->getOpcode()) {
5099   default:
5100     break;
5101   case Instruction::Add:
5102   case Instruction::Sub:
5103     return selectAddSub(I);
5104   case Instruction::Mul:
5105     return selectMul(I);
5106   case Instruction::SDiv:
5107     return selectSDiv(I);
5108   case Instruction::SRem:
5109     if (!selectBinaryOp(I, ISD::SREM))
5110       return selectRem(I, ISD::SREM);
5111     return true;
5112   case Instruction::URem:
5113     if (!selectBinaryOp(I, ISD::UREM))
5114       return selectRem(I, ISD::UREM);
5115     return true;
5116   case Instruction::Shl:
5117   case Instruction::LShr:
5118   case Instruction::AShr:
5119     return selectShift(I);
5120   case Instruction::And:
5121   case Instruction::Or:
5122   case Instruction::Xor:
5123     return selectLogicalOp(I);
5124   case Instruction::Br:
5125     return selectBranch(I);
5126   case Instruction::IndirectBr:
5127     return selectIndirectBr(I);
5128   case Instruction::BitCast:
5129     if (!FastISel::selectBitCast(I))
5130       return selectBitCast(I);
5131     return true;
5132   case Instruction::FPToSI:
5133     if (!selectCast(I, ISD::FP_TO_SINT))
5134       return selectFPToInt(I, /*Signed=*/true);
5135     return true;
5136   case Instruction::FPToUI:
5137     return selectFPToInt(I, /*Signed=*/false);
5138   case Instruction::ZExt:
5139   case Instruction::SExt:
5140     return selectIntExt(I);
5141   case Instruction::Trunc:
5142     if (!selectCast(I, ISD::TRUNCATE))
5143       return selectTrunc(I);
5144     return true;
5145   case Instruction::FPExt:
5146     return selectFPExt(I);
5147   case Instruction::FPTrunc:
5148     return selectFPTrunc(I);
5149   case Instruction::SIToFP:
5150     if (!selectCast(I, ISD::SINT_TO_FP))
5151       return selectIntToFP(I, /*Signed=*/true);
5152     return true;
5153   case Instruction::UIToFP:
5154     return selectIntToFP(I, /*Signed=*/false);
5155   case Instruction::Load:
5156     return selectLoad(I);
5157   case Instruction::Store:
5158     return selectStore(I);
5159   case Instruction::FCmp:
5160   case Instruction::ICmp:
5161     return selectCmp(I);
5162   case Instruction::Select:
5163     return selectSelect(I);
5164   case Instruction::Ret:
5165     return selectRet(I);
5166   case Instruction::FRem:
5167     return selectFRem(I);
5168   case Instruction::GetElementPtr:
5169     return selectGetElementPtr(I);
5170   case Instruction::AtomicCmpXchg:
5171     return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5172   }
5173 
5174   // fall-back to target-independent instruction selection.
5175   return selectOperator(I, I->getOpcode());
5176 }
5177 
createFastISel(FunctionLoweringInfo & FuncInfo,const TargetLibraryInfo * LibInfo)5178 FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
5179                                         const TargetLibraryInfo *LibInfo) {
5180 
5181   SMEAttrs CallerAttrs(*FuncInfo.Fn);
5182   if (CallerAttrs.hasZAState() || CallerAttrs.hasStreamingInterfaceOrBody() ||
5183       CallerAttrs.hasStreamingCompatibleInterface())
5184     return nullptr;
5185   return new AArch64FastISel(FuncInfo, LibInfo);
5186 }
5187