1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the AArch64-specific support for the FastISel class. Some 10 // of the target-specific code is generated by tablegen in the file 11 // AArch64GenFastISel.inc, which is #included here. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AArch64.h" 16 #include "AArch64CallingConvention.h" 17 #include "AArch64RegisterInfo.h" 18 #include "AArch64Subtarget.h" 19 #include "MCTargetDesc/AArch64AddressingModes.h" 20 #include "Utils/AArch64BaseInfo.h" 21 #include "llvm/ADT/APFloat.h" 22 #include "llvm/ADT/APInt.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include "llvm/Analysis/BranchProbabilityInfo.h" 26 #include "llvm/CodeGen/CallingConvLower.h" 27 #include "llvm/CodeGen/FastISel.h" 28 #include "llvm/CodeGen/FunctionLoweringInfo.h" 29 #include "llvm/CodeGen/ISDOpcodes.h" 30 #include "llvm/CodeGen/MachineBasicBlock.h" 31 #include "llvm/CodeGen/MachineConstantPool.h" 32 #include "llvm/CodeGen/MachineFrameInfo.h" 33 #include "llvm/CodeGen/MachineInstr.h" 34 #include "llvm/CodeGen/MachineInstrBuilder.h" 35 #include "llvm/CodeGen/MachineMemOperand.h" 36 #include "llvm/CodeGen/MachineRegisterInfo.h" 37 #include "llvm/CodeGen/RuntimeLibcalls.h" 38 #include "llvm/CodeGen/ValueTypes.h" 39 #include "llvm/IR/Argument.h" 40 #include "llvm/IR/Attributes.h" 41 #include "llvm/IR/BasicBlock.h" 42 #include "llvm/IR/CallingConv.h" 43 #include "llvm/IR/Constant.h" 44 #include "llvm/IR/Constants.h" 45 #include "llvm/IR/DataLayout.h" 46 #include "llvm/IR/DerivedTypes.h" 47 #include "llvm/IR/Function.h" 48 #include "llvm/IR/GetElementPtrTypeIterator.h" 49 #include "llvm/IR/GlobalValue.h" 50 #include "llvm/IR/InstrTypes.h" 51 #include "llvm/IR/Instruction.h" 52 #include "llvm/IR/Instructions.h" 53 #include "llvm/IR/IntrinsicInst.h" 54 #include "llvm/IR/Intrinsics.h" 55 #include "llvm/IR/Operator.h" 56 #include "llvm/IR/Type.h" 57 #include "llvm/IR/User.h" 58 #include "llvm/IR/Value.h" 59 #include "llvm/MC/MCInstrDesc.h" 60 #include "llvm/MC/MCRegisterInfo.h" 61 #include "llvm/MC/MCSymbol.h" 62 #include "llvm/Support/AtomicOrdering.h" 63 #include "llvm/Support/Casting.h" 64 #include "llvm/Support/CodeGen.h" 65 #include "llvm/Support/Compiler.h" 66 #include "llvm/Support/ErrorHandling.h" 67 #include "llvm/Support/MachineValueType.h" 68 #include "llvm/Support/MathExtras.h" 69 #include <algorithm> 70 #include <cassert> 71 #include <cstdint> 72 #include <iterator> 73 #include <utility> 74 75 using namespace llvm; 76 77 namespace { 78 79 class AArch64FastISel final : public FastISel { 80 class Address { 81 public: 82 using BaseKind = enum { 83 RegBase, 84 FrameIndexBase 85 }; 86 87 private: 88 BaseKind Kind = RegBase; 89 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend; 90 union { 91 unsigned Reg; 92 int FI; 93 } Base; 94 unsigned OffsetReg = 0; 95 unsigned Shift = 0; 96 int64_t Offset = 0; 97 const GlobalValue *GV = nullptr; 98 99 public: 100 Address() { Base.Reg = 0; } 101 102 void setKind(BaseKind K) { Kind = K; } 103 BaseKind getKind() const { return Kind; } 104 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; } 105 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; } 106 bool isRegBase() const { return Kind == RegBase; } 107 bool isFIBase() const { return Kind == FrameIndexBase; } 108 109 void setReg(unsigned Reg) { 110 assert(isRegBase() && "Invalid base register access!"); 111 Base.Reg = Reg; 112 } 113 114 unsigned getReg() const { 115 assert(isRegBase() && "Invalid base register access!"); 116 return Base.Reg; 117 } 118 119 void setOffsetReg(unsigned Reg) { 120 OffsetReg = Reg; 121 } 122 123 unsigned getOffsetReg() const { 124 return OffsetReg; 125 } 126 127 void setFI(unsigned FI) { 128 assert(isFIBase() && "Invalid base frame index access!"); 129 Base.FI = FI; 130 } 131 132 unsigned getFI() const { 133 assert(isFIBase() && "Invalid base frame index access!"); 134 return Base.FI; 135 } 136 137 void setOffset(int64_t O) { Offset = O; } 138 int64_t getOffset() { return Offset; } 139 void setShift(unsigned S) { Shift = S; } 140 unsigned getShift() { return Shift; } 141 142 void setGlobalValue(const GlobalValue *G) { GV = G; } 143 const GlobalValue *getGlobalValue() { return GV; } 144 }; 145 146 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can 147 /// make the right decision when generating code for different targets. 148 const AArch64Subtarget *Subtarget; 149 LLVMContext *Context; 150 151 bool fastLowerArguments() override; 152 bool fastLowerCall(CallLoweringInfo &CLI) override; 153 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override; 154 155 private: 156 // Selection routines. 157 bool selectAddSub(const Instruction *I); 158 bool selectLogicalOp(const Instruction *I); 159 bool selectLoad(const Instruction *I); 160 bool selectStore(const Instruction *I); 161 bool selectBranch(const Instruction *I); 162 bool selectIndirectBr(const Instruction *I); 163 bool selectCmp(const Instruction *I); 164 bool selectSelect(const Instruction *I); 165 bool selectFPExt(const Instruction *I); 166 bool selectFPTrunc(const Instruction *I); 167 bool selectFPToInt(const Instruction *I, bool Signed); 168 bool selectIntToFP(const Instruction *I, bool Signed); 169 bool selectRem(const Instruction *I, unsigned ISDOpcode); 170 bool selectRet(const Instruction *I); 171 bool selectTrunc(const Instruction *I); 172 bool selectIntExt(const Instruction *I); 173 bool selectMul(const Instruction *I); 174 bool selectShift(const Instruction *I); 175 bool selectBitCast(const Instruction *I); 176 bool selectFRem(const Instruction *I); 177 bool selectSDiv(const Instruction *I); 178 bool selectGetElementPtr(const Instruction *I); 179 bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I); 180 181 // Utility helper routines. 182 bool isTypeLegal(Type *Ty, MVT &VT); 183 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false); 184 bool isValueAvailable(const Value *V) const; 185 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr); 186 bool computeCallAddress(const Value *V, Address &Addr); 187 bool simplifyAddress(Address &Addr, MVT VT); 188 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB, 189 MachineMemOperand::Flags Flags, 190 unsigned ScaleFactor, MachineMemOperand *MMO); 191 bool isMemCpySmall(uint64_t Len, unsigned Alignment); 192 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, 193 unsigned Alignment); 194 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I, 195 const Value *Cond); 196 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT); 197 bool optimizeSelect(const SelectInst *SI); 198 std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx); 199 200 // Emit helper routines. 201 unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, 202 const Value *RHS, bool SetFlags = false, 203 bool WantResult = true, bool IsZExt = false); 204 unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, 205 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, 206 bool SetFlags = false, bool WantResult = true); 207 unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, 208 bool LHSIsKill, uint64_t Imm, bool SetFlags = false, 209 bool WantResult = true); 210 unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, 211 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, 212 AArch64_AM::ShiftExtendType ShiftType, 213 uint64_t ShiftImm, bool SetFlags = false, 214 bool WantResult = true); 215 unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, 216 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, 217 AArch64_AM::ShiftExtendType ExtType, 218 uint64_t ShiftImm, bool SetFlags = false, 219 bool WantResult = true); 220 221 // Emit functions. 222 bool emitCompareAndBranch(const BranchInst *BI); 223 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt); 224 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt); 225 bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm); 226 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS); 227 unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true, 228 MachineMemOperand *MMO = nullptr); 229 bool emitStore(MVT VT, unsigned SrcReg, Address Addr, 230 MachineMemOperand *MMO = nullptr); 231 bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg, 232 MachineMemOperand *MMO = nullptr); 233 unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); 234 unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt); 235 unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, 236 bool SetFlags = false, bool WantResult = true, 237 bool IsZExt = false); 238 unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm); 239 unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS, 240 bool SetFlags = false, bool WantResult = true, 241 bool IsZExt = false); 242 unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill, 243 unsigned RHSReg, bool RHSIsKill, bool WantResult = true); 244 unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill, 245 unsigned RHSReg, bool RHSIsKill, 246 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm, 247 bool WantResult = true); 248 unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS, 249 const Value *RHS); 250 unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, 251 bool LHSIsKill, uint64_t Imm); 252 unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, 253 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, 254 uint64_t ShiftImm); 255 unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm); 256 unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 257 unsigned Op1, bool Op1IsKill); 258 unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 259 unsigned Op1, bool Op1IsKill); 260 unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 261 unsigned Op1, bool Op1IsKill); 262 unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 263 unsigned Op1Reg, bool Op1IsKill); 264 unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill, 265 uint64_t Imm, bool IsZExt = true); 266 unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 267 unsigned Op1Reg, bool Op1IsKill); 268 unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill, 269 uint64_t Imm, bool IsZExt = true); 270 unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 271 unsigned Op1Reg, bool Op1IsKill); 272 unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill, 273 uint64_t Imm, bool IsZExt = false); 274 275 unsigned materializeInt(const ConstantInt *CI, MVT VT); 276 unsigned materializeFP(const ConstantFP *CFP, MVT VT); 277 unsigned materializeGV(const GlobalValue *GV); 278 279 // Call handling routines. 280 private: 281 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const; 282 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs, 283 unsigned &NumBytes); 284 bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes); 285 286 public: 287 // Backend specific FastISel code. 288 unsigned fastMaterializeAlloca(const AllocaInst *AI) override; 289 unsigned fastMaterializeConstant(const Constant *C) override; 290 unsigned fastMaterializeFloatZero(const ConstantFP* CF) override; 291 292 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo, 293 const TargetLibraryInfo *LibInfo) 294 : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) { 295 Subtarget = 296 &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget()); 297 Context = &FuncInfo.Fn->getContext(); 298 } 299 300 bool fastSelectInstruction(const Instruction *I) override; 301 302 #include "AArch64GenFastISel.inc" 303 }; 304 305 } // end anonymous namespace 306 307 /// Check if the sign-/zero-extend will be a noop. 308 static bool isIntExtFree(const Instruction *I) { 309 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 310 "Unexpected integer extend instruction."); 311 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() && 312 "Unexpected value type."); 313 bool IsZExt = isa<ZExtInst>(I); 314 315 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0))) 316 if (LI->hasOneUse()) 317 return true; 318 319 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) 320 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) 321 return true; 322 323 return false; 324 } 325 326 /// Determine the implicit scale factor that is applied by a memory 327 /// operation for a given value type. 328 static unsigned getImplicitScaleFactor(MVT VT) { 329 switch (VT.SimpleTy) { 330 default: 331 return 0; // invalid 332 case MVT::i1: // fall-through 333 case MVT::i8: 334 return 1; 335 case MVT::i16: 336 return 2; 337 case MVT::i32: // fall-through 338 case MVT::f32: 339 return 4; 340 case MVT::i64: // fall-through 341 case MVT::f64: 342 return 8; 343 } 344 } 345 346 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const { 347 if (CC == CallingConv::WebKit_JS) 348 return CC_AArch64_WebKit_JS; 349 if (CC == CallingConv::GHC) 350 return CC_AArch64_GHC; 351 if (CC == CallingConv::CFGuard_Check) 352 return CC_AArch64_Win64_CFGuard_Check; 353 return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS; 354 } 355 356 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) { 357 assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 && 358 "Alloca should always return a pointer."); 359 360 // Don't handle dynamic allocas. 361 if (!FuncInfo.StaticAllocaMap.count(AI)) 362 return 0; 363 364 DenseMap<const AllocaInst *, int>::iterator SI = 365 FuncInfo.StaticAllocaMap.find(AI); 366 367 if (SI != FuncInfo.StaticAllocaMap.end()) { 368 unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass); 369 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), 370 ResultReg) 371 .addFrameIndex(SI->second) 372 .addImm(0) 373 .addImm(0); 374 return ResultReg; 375 } 376 377 return 0; 378 } 379 380 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) { 381 if (VT > MVT::i64) 382 return 0; 383 384 if (!CI->isZero()) 385 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); 386 387 // Create a copy from the zero register to materialize a "0" value. 388 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass 389 : &AArch64::GPR32RegClass; 390 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 391 unsigned ResultReg = createResultReg(RC); 392 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), 393 ResultReg).addReg(ZeroReg, getKillRegState(true)); 394 return ResultReg; 395 } 396 397 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) { 398 // Positive zero (+0.0) has to be materialized with a fmov from the zero 399 // register, because the immediate version of fmov cannot encode zero. 400 if (CFP->isNullValue()) 401 return fastMaterializeFloatZero(CFP); 402 403 if (VT != MVT::f32 && VT != MVT::f64) 404 return 0; 405 406 const APFloat Val = CFP->getValueAPF(); 407 bool Is64Bit = (VT == MVT::f64); 408 // This checks to see if we can use FMOV instructions to materialize 409 // a constant, otherwise we have to materialize via the constant pool. 410 int Imm = 411 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val); 412 if (Imm != -1) { 413 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi; 414 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm); 415 } 416 417 // For the MachO large code model materialize the FP constant in code. 418 if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) { 419 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm; 420 const TargetRegisterClass *RC = Is64Bit ? 421 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 422 423 unsigned TmpReg = createResultReg(RC); 424 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg) 425 .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); 426 427 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 428 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 429 TII.get(TargetOpcode::COPY), ResultReg) 430 .addReg(TmpReg, getKillRegState(true)); 431 432 return ResultReg; 433 } 434 435 // Materialize via constant pool. MachineConstantPool wants an explicit 436 // alignment. 437 unsigned Align = DL.getPrefTypeAlignment(CFP->getType()); 438 if (Align == 0) 439 Align = DL.getTypeAllocSize(CFP->getType()); 440 441 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align); 442 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 443 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 444 ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE); 445 446 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui; 447 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 448 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 449 .addReg(ADRPReg) 450 .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 451 return ResultReg; 452 } 453 454 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) { 455 // We can't handle thread-local variables quickly yet. 456 if (GV->isThreadLocal()) 457 return 0; 458 459 // MachO still uses GOT for large code-model accesses, but ELF requires 460 // movz/movk sequences, which FastISel doesn't handle yet. 461 if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO()) 462 return 0; 463 464 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); 465 466 EVT DestEVT = TLI.getValueType(DL, GV->getType(), true); 467 if (!DestEVT.isSimple()) 468 return 0; 469 470 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 471 unsigned ResultReg; 472 473 if (OpFlags & AArch64II::MO_GOT) { 474 // ADRP + LDRX 475 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 476 ADRPReg) 477 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); 478 479 unsigned LdrOpc; 480 if (Subtarget->isTargetILP32()) { 481 ResultReg = createResultReg(&AArch64::GPR32RegClass); 482 LdrOpc = AArch64::LDRWui; 483 } else { 484 ResultReg = createResultReg(&AArch64::GPR64RegClass); 485 LdrOpc = AArch64::LDRXui; 486 } 487 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(LdrOpc), 488 ResultReg) 489 .addReg(ADRPReg) 490 .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | 491 AArch64II::MO_NC | OpFlags); 492 if (!Subtarget->isTargetILP32()) 493 return ResultReg; 494 495 // LDRWui produces a 32-bit register, but pointers in-register are 64-bits 496 // so we must extend the result on ILP32. 497 unsigned Result64 = createResultReg(&AArch64::GPR64RegClass); 498 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 499 TII.get(TargetOpcode::SUBREG_TO_REG)) 500 .addDef(Result64) 501 .addImm(0) 502 .addReg(ResultReg, RegState::Kill) 503 .addImm(AArch64::sub_32); 504 return Result64; 505 } else { 506 // ADRP + ADDX 507 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 508 ADRPReg) 509 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); 510 511 ResultReg = createResultReg(&AArch64::GPR64spRegClass); 512 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), 513 ResultReg) 514 .addReg(ADRPReg) 515 .addGlobalAddress(GV, 0, 516 AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags) 517 .addImm(0); 518 } 519 return ResultReg; 520 } 521 522 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) { 523 EVT CEVT = TLI.getValueType(DL, C->getType(), true); 524 525 // Only handle simple types. 526 if (!CEVT.isSimple()) 527 return 0; 528 MVT VT = CEVT.getSimpleVT(); 529 // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that, 530 // 'null' pointers need to have a somewhat special treatment. 531 if (const auto *CPN = dyn_cast<ConstantPointerNull>(C)) { 532 (void)CPN; 533 assert(CPN->getType()->getPointerAddressSpace() == 0 && 534 "Unexpected address space"); 535 assert(VT == MVT::i64 && "Expected 64-bit pointers"); 536 return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT); 537 } 538 539 if (const auto *CI = dyn_cast<ConstantInt>(C)) 540 return materializeInt(CI, VT); 541 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) 542 return materializeFP(CFP, VT); 543 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) 544 return materializeGV(GV); 545 546 return 0; 547 } 548 549 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) { 550 assert(CFP->isNullValue() && 551 "Floating-point constant is not a positive zero."); 552 MVT VT; 553 if (!isTypeLegal(CFP->getType(), VT)) 554 return 0; 555 556 if (VT != MVT::f32 && VT != MVT::f64) 557 return 0; 558 559 bool Is64Bit = (VT == MVT::f64); 560 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 561 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr; 562 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true); 563 } 564 565 /// Check if the multiply is by a power-of-2 constant. 566 static bool isMulPowOf2(const Value *I) { 567 if (const auto *MI = dyn_cast<MulOperator>(I)) { 568 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0))) 569 if (C->getValue().isPowerOf2()) 570 return true; 571 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1))) 572 if (C->getValue().isPowerOf2()) 573 return true; 574 } 575 return false; 576 } 577 578 // Computes the address to get to an object. 579 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) 580 { 581 const User *U = nullptr; 582 unsigned Opcode = Instruction::UserOp1; 583 if (const Instruction *I = dyn_cast<Instruction>(Obj)) { 584 // Don't walk into other basic blocks unless the object is an alloca from 585 // another block, otherwise it may not have a virtual register assigned. 586 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) || 587 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 588 Opcode = I->getOpcode(); 589 U = I; 590 } 591 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { 592 Opcode = C->getOpcode(); 593 U = C; 594 } 595 596 if (auto *Ty = dyn_cast<PointerType>(Obj->getType())) 597 if (Ty->getAddressSpace() > 255) 598 // Fast instruction selection doesn't support the special 599 // address spaces. 600 return false; 601 602 switch (Opcode) { 603 default: 604 break; 605 case Instruction::BitCast: 606 // Look through bitcasts. 607 return computeAddress(U->getOperand(0), Addr, Ty); 608 609 case Instruction::IntToPtr: 610 // Look past no-op inttoptrs. 611 if (TLI.getValueType(DL, U->getOperand(0)->getType()) == 612 TLI.getPointerTy(DL)) 613 return computeAddress(U->getOperand(0), Addr, Ty); 614 break; 615 616 case Instruction::PtrToInt: 617 // Look past no-op ptrtoints. 618 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) 619 return computeAddress(U->getOperand(0), Addr, Ty); 620 break; 621 622 case Instruction::GetElementPtr: { 623 Address SavedAddr = Addr; 624 uint64_t TmpOffset = Addr.getOffset(); 625 626 // Iterate through the GEP folding the constants into offsets where 627 // we can. 628 for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U); 629 GTI != E; ++GTI) { 630 const Value *Op = GTI.getOperand(); 631 if (StructType *STy = GTI.getStructTypeOrNull()) { 632 const StructLayout *SL = DL.getStructLayout(STy); 633 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); 634 TmpOffset += SL->getElementOffset(Idx); 635 } else { 636 uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); 637 while (true) { 638 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 639 // Constant-offset addressing. 640 TmpOffset += CI->getSExtValue() * S; 641 break; 642 } 643 if (canFoldAddIntoGEP(U, Op)) { 644 // A compatible add with a constant operand. Fold the constant. 645 ConstantInt *CI = 646 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); 647 TmpOffset += CI->getSExtValue() * S; 648 // Iterate on the other operand. 649 Op = cast<AddOperator>(Op)->getOperand(0); 650 continue; 651 } 652 // Unsupported 653 goto unsupported_gep; 654 } 655 } 656 } 657 658 // Try to grab the base operand now. 659 Addr.setOffset(TmpOffset); 660 if (computeAddress(U->getOperand(0), Addr, Ty)) 661 return true; 662 663 // We failed, restore everything and try the other options. 664 Addr = SavedAddr; 665 666 unsupported_gep: 667 break; 668 } 669 case Instruction::Alloca: { 670 const AllocaInst *AI = cast<AllocaInst>(Obj); 671 DenseMap<const AllocaInst *, int>::iterator SI = 672 FuncInfo.StaticAllocaMap.find(AI); 673 if (SI != FuncInfo.StaticAllocaMap.end()) { 674 Addr.setKind(Address::FrameIndexBase); 675 Addr.setFI(SI->second); 676 return true; 677 } 678 break; 679 } 680 case Instruction::Add: { 681 // Adds of constants are common and easy enough. 682 const Value *LHS = U->getOperand(0); 683 const Value *RHS = U->getOperand(1); 684 685 if (isa<ConstantInt>(LHS)) 686 std::swap(LHS, RHS); 687 688 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 689 Addr.setOffset(Addr.getOffset() + CI->getSExtValue()); 690 return computeAddress(LHS, Addr, Ty); 691 } 692 693 Address Backup = Addr; 694 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty)) 695 return true; 696 Addr = Backup; 697 698 break; 699 } 700 case Instruction::Sub: { 701 // Subs of constants are common and easy enough. 702 const Value *LHS = U->getOperand(0); 703 const Value *RHS = U->getOperand(1); 704 705 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 706 Addr.setOffset(Addr.getOffset() - CI->getSExtValue()); 707 return computeAddress(LHS, Addr, Ty); 708 } 709 break; 710 } 711 case Instruction::Shl: { 712 if (Addr.getOffsetReg()) 713 break; 714 715 const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1)); 716 if (!CI) 717 break; 718 719 unsigned Val = CI->getZExtValue(); 720 if (Val < 1 || Val > 3) 721 break; 722 723 uint64_t NumBytes = 0; 724 if (Ty && Ty->isSized()) { 725 uint64_t NumBits = DL.getTypeSizeInBits(Ty); 726 NumBytes = NumBits / 8; 727 if (!isPowerOf2_64(NumBits)) 728 NumBytes = 0; 729 } 730 731 if (NumBytes != (1ULL << Val)) 732 break; 733 734 Addr.setShift(Val); 735 Addr.setExtendType(AArch64_AM::LSL); 736 737 const Value *Src = U->getOperand(0); 738 if (const auto *I = dyn_cast<Instruction>(Src)) { 739 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 740 // Fold the zext or sext when it won't become a noop. 741 if (const auto *ZE = dyn_cast<ZExtInst>(I)) { 742 if (!isIntExtFree(ZE) && 743 ZE->getOperand(0)->getType()->isIntegerTy(32)) { 744 Addr.setExtendType(AArch64_AM::UXTW); 745 Src = ZE->getOperand(0); 746 } 747 } else if (const auto *SE = dyn_cast<SExtInst>(I)) { 748 if (!isIntExtFree(SE) && 749 SE->getOperand(0)->getType()->isIntegerTy(32)) { 750 Addr.setExtendType(AArch64_AM::SXTW); 751 Src = SE->getOperand(0); 752 } 753 } 754 } 755 } 756 757 if (const auto *AI = dyn_cast<BinaryOperator>(Src)) 758 if (AI->getOpcode() == Instruction::And) { 759 const Value *LHS = AI->getOperand(0); 760 const Value *RHS = AI->getOperand(1); 761 762 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 763 if (C->getValue() == 0xffffffff) 764 std::swap(LHS, RHS); 765 766 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 767 if (C->getValue() == 0xffffffff) { 768 Addr.setExtendType(AArch64_AM::UXTW); 769 unsigned Reg = getRegForValue(LHS); 770 if (!Reg) 771 return false; 772 bool RegIsKill = hasTrivialKill(LHS); 773 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill, 774 AArch64::sub_32); 775 Addr.setOffsetReg(Reg); 776 return true; 777 } 778 } 779 780 unsigned Reg = getRegForValue(Src); 781 if (!Reg) 782 return false; 783 Addr.setOffsetReg(Reg); 784 return true; 785 } 786 case Instruction::Mul: { 787 if (Addr.getOffsetReg()) 788 break; 789 790 if (!isMulPowOf2(U)) 791 break; 792 793 const Value *LHS = U->getOperand(0); 794 const Value *RHS = U->getOperand(1); 795 796 // Canonicalize power-of-2 value to the RHS. 797 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 798 if (C->getValue().isPowerOf2()) 799 std::swap(LHS, RHS); 800 801 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt."); 802 const auto *C = cast<ConstantInt>(RHS); 803 unsigned Val = C->getValue().logBase2(); 804 if (Val < 1 || Val > 3) 805 break; 806 807 uint64_t NumBytes = 0; 808 if (Ty && Ty->isSized()) { 809 uint64_t NumBits = DL.getTypeSizeInBits(Ty); 810 NumBytes = NumBits / 8; 811 if (!isPowerOf2_64(NumBits)) 812 NumBytes = 0; 813 } 814 815 if (NumBytes != (1ULL << Val)) 816 break; 817 818 Addr.setShift(Val); 819 Addr.setExtendType(AArch64_AM::LSL); 820 821 const Value *Src = LHS; 822 if (const auto *I = dyn_cast<Instruction>(Src)) { 823 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 824 // Fold the zext or sext when it won't become a noop. 825 if (const auto *ZE = dyn_cast<ZExtInst>(I)) { 826 if (!isIntExtFree(ZE) && 827 ZE->getOperand(0)->getType()->isIntegerTy(32)) { 828 Addr.setExtendType(AArch64_AM::UXTW); 829 Src = ZE->getOperand(0); 830 } 831 } else if (const auto *SE = dyn_cast<SExtInst>(I)) { 832 if (!isIntExtFree(SE) && 833 SE->getOperand(0)->getType()->isIntegerTy(32)) { 834 Addr.setExtendType(AArch64_AM::SXTW); 835 Src = SE->getOperand(0); 836 } 837 } 838 } 839 } 840 841 unsigned Reg = getRegForValue(Src); 842 if (!Reg) 843 return false; 844 Addr.setOffsetReg(Reg); 845 return true; 846 } 847 case Instruction::And: { 848 if (Addr.getOffsetReg()) 849 break; 850 851 if (!Ty || DL.getTypeSizeInBits(Ty) != 8) 852 break; 853 854 const Value *LHS = U->getOperand(0); 855 const Value *RHS = U->getOperand(1); 856 857 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 858 if (C->getValue() == 0xffffffff) 859 std::swap(LHS, RHS); 860 861 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 862 if (C->getValue() == 0xffffffff) { 863 Addr.setShift(0); 864 Addr.setExtendType(AArch64_AM::LSL); 865 Addr.setExtendType(AArch64_AM::UXTW); 866 867 unsigned Reg = getRegForValue(LHS); 868 if (!Reg) 869 return false; 870 bool RegIsKill = hasTrivialKill(LHS); 871 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill, 872 AArch64::sub_32); 873 Addr.setOffsetReg(Reg); 874 return true; 875 } 876 break; 877 } 878 case Instruction::SExt: 879 case Instruction::ZExt: { 880 if (!Addr.getReg() || Addr.getOffsetReg()) 881 break; 882 883 const Value *Src = nullptr; 884 // Fold the zext or sext when it won't become a noop. 885 if (const auto *ZE = dyn_cast<ZExtInst>(U)) { 886 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) { 887 Addr.setExtendType(AArch64_AM::UXTW); 888 Src = ZE->getOperand(0); 889 } 890 } else if (const auto *SE = dyn_cast<SExtInst>(U)) { 891 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) { 892 Addr.setExtendType(AArch64_AM::SXTW); 893 Src = SE->getOperand(0); 894 } 895 } 896 897 if (!Src) 898 break; 899 900 Addr.setShift(0); 901 unsigned Reg = getRegForValue(Src); 902 if (!Reg) 903 return false; 904 Addr.setOffsetReg(Reg); 905 return true; 906 } 907 } // end switch 908 909 if (Addr.isRegBase() && !Addr.getReg()) { 910 unsigned Reg = getRegForValue(Obj); 911 if (!Reg) 912 return false; 913 Addr.setReg(Reg); 914 return true; 915 } 916 917 if (!Addr.getOffsetReg()) { 918 unsigned Reg = getRegForValue(Obj); 919 if (!Reg) 920 return false; 921 Addr.setOffsetReg(Reg); 922 return true; 923 } 924 925 return false; 926 } 927 928 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) { 929 const User *U = nullptr; 930 unsigned Opcode = Instruction::UserOp1; 931 bool InMBB = true; 932 933 if (const auto *I = dyn_cast<Instruction>(V)) { 934 Opcode = I->getOpcode(); 935 U = I; 936 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock(); 937 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) { 938 Opcode = C->getOpcode(); 939 U = C; 940 } 941 942 switch (Opcode) { 943 default: break; 944 case Instruction::BitCast: 945 // Look past bitcasts if its operand is in the same BB. 946 if (InMBB) 947 return computeCallAddress(U->getOperand(0), Addr); 948 break; 949 case Instruction::IntToPtr: 950 // Look past no-op inttoptrs if its operand is in the same BB. 951 if (InMBB && 952 TLI.getValueType(DL, U->getOperand(0)->getType()) == 953 TLI.getPointerTy(DL)) 954 return computeCallAddress(U->getOperand(0), Addr); 955 break; 956 case Instruction::PtrToInt: 957 // Look past no-op ptrtoints if its operand is in the same BB. 958 if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) 959 return computeCallAddress(U->getOperand(0), Addr); 960 break; 961 } 962 963 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 964 Addr.setGlobalValue(GV); 965 return true; 966 } 967 968 // If all else fails, try to materialize the value in a register. 969 if (!Addr.getGlobalValue()) { 970 Addr.setReg(getRegForValue(V)); 971 return Addr.getReg() != 0; 972 } 973 974 return false; 975 } 976 977 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) { 978 EVT evt = TLI.getValueType(DL, Ty, true); 979 980 if (Subtarget->isTargetILP32() && Ty->isPointerTy()) 981 return false; 982 983 // Only handle simple types. 984 if (evt == MVT::Other || !evt.isSimple()) 985 return false; 986 VT = evt.getSimpleVT(); 987 988 // This is a legal type, but it's not something we handle in fast-isel. 989 if (VT == MVT::f128) 990 return false; 991 992 // Handle all other legal types, i.e. a register that will directly hold this 993 // value. 994 return TLI.isTypeLegal(VT); 995 } 996 997 /// Determine if the value type is supported by FastISel. 998 /// 999 /// FastISel for AArch64 can handle more value types than are legal. This adds 1000 /// simple value type such as i1, i8, and i16. 1001 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) { 1002 if (Ty->isVectorTy() && !IsVectorAllowed) 1003 return false; 1004 1005 if (isTypeLegal(Ty, VT)) 1006 return true; 1007 1008 // If this is a type than can be sign or zero-extended to a basic operation 1009 // go ahead and accept it now. 1010 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) 1011 return true; 1012 1013 return false; 1014 } 1015 1016 bool AArch64FastISel::isValueAvailable(const Value *V) const { 1017 if (!isa<Instruction>(V)) 1018 return true; 1019 1020 const auto *I = cast<Instruction>(V); 1021 return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB; 1022 } 1023 1024 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { 1025 if (Subtarget->isTargetILP32()) 1026 return false; 1027 1028 unsigned ScaleFactor = getImplicitScaleFactor(VT); 1029 if (!ScaleFactor) 1030 return false; 1031 1032 bool ImmediateOffsetNeedsLowering = false; 1033 bool RegisterOffsetNeedsLowering = false; 1034 int64_t Offset = Addr.getOffset(); 1035 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset)) 1036 ImmediateOffsetNeedsLowering = true; 1037 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) && 1038 !isUInt<12>(Offset / ScaleFactor)) 1039 ImmediateOffsetNeedsLowering = true; 1040 1041 // Cannot encode an offset register and an immediate offset in the same 1042 // instruction. Fold the immediate offset into the load/store instruction and 1043 // emit an additional add to take care of the offset register. 1044 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg()) 1045 RegisterOffsetNeedsLowering = true; 1046 1047 // Cannot encode zero register as base. 1048 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg()) 1049 RegisterOffsetNeedsLowering = true; 1050 1051 // If this is a stack pointer and the offset needs to be simplified then put 1052 // the alloca address into a register, set the base type back to register and 1053 // continue. This should almost never happen. 1054 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase()) 1055 { 1056 unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass); 1057 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), 1058 ResultReg) 1059 .addFrameIndex(Addr.getFI()) 1060 .addImm(0) 1061 .addImm(0); 1062 Addr.setKind(Address::RegBase); 1063 Addr.setReg(ResultReg); 1064 } 1065 1066 if (RegisterOffsetNeedsLowering) { 1067 unsigned ResultReg = 0; 1068 if (Addr.getReg()) { 1069 if (Addr.getExtendType() == AArch64_AM::SXTW || 1070 Addr.getExtendType() == AArch64_AM::UXTW ) 1071 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 1072 /*TODO:IsKill=*/false, Addr.getOffsetReg(), 1073 /*TODO:IsKill=*/false, Addr.getExtendType(), 1074 Addr.getShift()); 1075 else 1076 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 1077 /*TODO:IsKill=*/false, Addr.getOffsetReg(), 1078 /*TODO:IsKill=*/false, AArch64_AM::LSL, 1079 Addr.getShift()); 1080 } else { 1081 if (Addr.getExtendType() == AArch64_AM::UXTW) 1082 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), 1083 /*Op0IsKill=*/false, Addr.getShift(), 1084 /*IsZExt=*/true); 1085 else if (Addr.getExtendType() == AArch64_AM::SXTW) 1086 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), 1087 /*Op0IsKill=*/false, Addr.getShift(), 1088 /*IsZExt=*/false); 1089 else 1090 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(), 1091 /*Op0IsKill=*/false, Addr.getShift()); 1092 } 1093 if (!ResultReg) 1094 return false; 1095 1096 Addr.setReg(ResultReg); 1097 Addr.setOffsetReg(0); 1098 Addr.setShift(0); 1099 Addr.setExtendType(AArch64_AM::InvalidShiftExtend); 1100 } 1101 1102 // Since the offset is too large for the load/store instruction get the 1103 // reg+offset into a register. 1104 if (ImmediateOffsetNeedsLowering) { 1105 unsigned ResultReg; 1106 if (Addr.getReg()) 1107 // Try to fold the immediate into the add instruction. 1108 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset); 1109 else 1110 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset); 1111 1112 if (!ResultReg) 1113 return false; 1114 Addr.setReg(ResultReg); 1115 Addr.setOffset(0); 1116 } 1117 return true; 1118 } 1119 1120 void AArch64FastISel::addLoadStoreOperands(Address &Addr, 1121 const MachineInstrBuilder &MIB, 1122 MachineMemOperand::Flags Flags, 1123 unsigned ScaleFactor, 1124 MachineMemOperand *MMO) { 1125 int64_t Offset = Addr.getOffset() / ScaleFactor; 1126 // Frame base works a bit differently. Handle it separately. 1127 if (Addr.isFIBase()) { 1128 int FI = Addr.getFI(); 1129 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size 1130 // and alignment should be based on the VT. 1131 MMO = FuncInfo.MF->getMachineMemOperand( 1132 MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags, 1133 MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); 1134 // Now add the rest of the operands. 1135 MIB.addFrameIndex(FI).addImm(Offset); 1136 } else { 1137 assert(Addr.isRegBase() && "Unexpected address kind."); 1138 const MCInstrDesc &II = MIB->getDesc(); 1139 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0; 1140 Addr.setReg( 1141 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx)); 1142 Addr.setOffsetReg( 1143 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1)); 1144 if (Addr.getOffsetReg()) { 1145 assert(Addr.getOffset() == 0 && "Unexpected offset"); 1146 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW || 1147 Addr.getExtendType() == AArch64_AM::SXTX; 1148 MIB.addReg(Addr.getReg()); 1149 MIB.addReg(Addr.getOffsetReg()); 1150 MIB.addImm(IsSigned); 1151 MIB.addImm(Addr.getShift() != 0); 1152 } else 1153 MIB.addReg(Addr.getReg()).addImm(Offset); 1154 } 1155 1156 if (MMO) 1157 MIB.addMemOperand(MMO); 1158 } 1159 1160 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, 1161 const Value *RHS, bool SetFlags, 1162 bool WantResult, bool IsZExt) { 1163 AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend; 1164 bool NeedExtend = false; 1165 switch (RetVT.SimpleTy) { 1166 default: 1167 return 0; 1168 case MVT::i1: 1169 NeedExtend = true; 1170 break; 1171 case MVT::i8: 1172 NeedExtend = true; 1173 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB; 1174 break; 1175 case MVT::i16: 1176 NeedExtend = true; 1177 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH; 1178 break; 1179 case MVT::i32: // fall-through 1180 case MVT::i64: 1181 break; 1182 } 1183 MVT SrcVT = RetVT; 1184 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32); 1185 1186 // Canonicalize immediates to the RHS first. 1187 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS)) 1188 std::swap(LHS, RHS); 1189 1190 // Canonicalize mul by power of 2 to the RHS. 1191 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) 1192 if (isMulPowOf2(LHS)) 1193 std::swap(LHS, RHS); 1194 1195 // Canonicalize shift immediate to the RHS. 1196 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) 1197 if (const auto *SI = dyn_cast<BinaryOperator>(LHS)) 1198 if (isa<ConstantInt>(SI->getOperand(1))) 1199 if (SI->getOpcode() == Instruction::Shl || 1200 SI->getOpcode() == Instruction::LShr || 1201 SI->getOpcode() == Instruction::AShr ) 1202 std::swap(LHS, RHS); 1203 1204 unsigned LHSReg = getRegForValue(LHS); 1205 if (!LHSReg) 1206 return 0; 1207 bool LHSIsKill = hasTrivialKill(LHS); 1208 1209 if (NeedExtend) 1210 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt); 1211 1212 unsigned ResultReg = 0; 1213 if (const auto *C = dyn_cast<ConstantInt>(RHS)) { 1214 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue(); 1215 if (C->isNegative()) 1216 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm, 1217 SetFlags, WantResult); 1218 else 1219 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags, 1220 WantResult); 1221 } else if (const auto *C = dyn_cast<Constant>(RHS)) 1222 if (C->isNullValue()) 1223 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags, 1224 WantResult); 1225 1226 if (ResultReg) 1227 return ResultReg; 1228 1229 // Only extend the RHS within the instruction if there is a valid extend type. 1230 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() && 1231 isValueAvailable(RHS)) { 1232 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) 1233 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) 1234 if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) { 1235 unsigned RHSReg = getRegForValue(SI->getOperand(0)); 1236 if (!RHSReg) 1237 return 0; 1238 bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); 1239 return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, 1240 RHSIsKill, ExtendType, C->getZExtValue(), 1241 SetFlags, WantResult); 1242 } 1243 unsigned RHSReg = getRegForValue(RHS); 1244 if (!RHSReg) 1245 return 0; 1246 bool RHSIsKill = hasTrivialKill(RHS); 1247 return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill, 1248 ExtendType, 0, SetFlags, WantResult); 1249 } 1250 1251 // Check if the mul can be folded into the instruction. 1252 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1253 if (isMulPowOf2(RHS)) { 1254 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); 1255 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); 1256 1257 if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) 1258 if (C->getValue().isPowerOf2()) 1259 std::swap(MulLHS, MulRHS); 1260 1261 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); 1262 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); 1263 unsigned RHSReg = getRegForValue(MulLHS); 1264 if (!RHSReg) 1265 return 0; 1266 bool RHSIsKill = hasTrivialKill(MulLHS); 1267 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, 1268 RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags, 1269 WantResult); 1270 if (ResultReg) 1271 return ResultReg; 1272 } 1273 } 1274 1275 // Check if the shift can be folded into the instruction. 1276 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1277 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) { 1278 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1279 AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend; 1280 switch (SI->getOpcode()) { 1281 default: break; 1282 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break; 1283 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break; 1284 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break; 1285 } 1286 uint64_t ShiftVal = C->getZExtValue(); 1287 if (ShiftType != AArch64_AM::InvalidShiftExtend) { 1288 unsigned RHSReg = getRegForValue(SI->getOperand(0)); 1289 if (!RHSReg) 1290 return 0; 1291 bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); 1292 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, 1293 RHSIsKill, ShiftType, ShiftVal, SetFlags, 1294 WantResult); 1295 if (ResultReg) 1296 return ResultReg; 1297 } 1298 } 1299 } 1300 } 1301 1302 unsigned RHSReg = getRegForValue(RHS); 1303 if (!RHSReg) 1304 return 0; 1305 bool RHSIsKill = hasTrivialKill(RHS); 1306 1307 if (NeedExtend) 1308 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt); 1309 1310 return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill, 1311 SetFlags, WantResult); 1312 } 1313 1314 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, 1315 bool LHSIsKill, unsigned RHSReg, 1316 bool RHSIsKill, bool SetFlags, 1317 bool WantResult) { 1318 assert(LHSReg && RHSReg && "Invalid register number."); 1319 1320 if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP || 1321 RHSReg == AArch64::SP || RHSReg == AArch64::WSP) 1322 return 0; 1323 1324 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1325 return 0; 1326 1327 static const unsigned OpcTable[2][2][2] = { 1328 { { AArch64::SUBWrr, AArch64::SUBXrr }, 1329 { AArch64::ADDWrr, AArch64::ADDXrr } }, 1330 { { AArch64::SUBSWrr, AArch64::SUBSXrr }, 1331 { AArch64::ADDSWrr, AArch64::ADDSXrr } } 1332 }; 1333 bool Is64Bit = RetVT == MVT::i64; 1334 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1335 const TargetRegisterClass *RC = 1336 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1337 unsigned ResultReg; 1338 if (WantResult) 1339 ResultReg = createResultReg(RC); 1340 else 1341 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1342 1343 const MCInstrDesc &II = TII.get(Opc); 1344 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1345 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1346 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1347 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1348 .addReg(RHSReg, getKillRegState(RHSIsKill)); 1349 return ResultReg; 1350 } 1351 1352 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, 1353 bool LHSIsKill, uint64_t Imm, 1354 bool SetFlags, bool WantResult) { 1355 assert(LHSReg && "Invalid register number."); 1356 1357 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1358 return 0; 1359 1360 unsigned ShiftImm; 1361 if (isUInt<12>(Imm)) 1362 ShiftImm = 0; 1363 else if ((Imm & 0xfff000) == Imm) { 1364 ShiftImm = 12; 1365 Imm >>= 12; 1366 } else 1367 return 0; 1368 1369 static const unsigned OpcTable[2][2][2] = { 1370 { { AArch64::SUBWri, AArch64::SUBXri }, 1371 { AArch64::ADDWri, AArch64::ADDXri } }, 1372 { { AArch64::SUBSWri, AArch64::SUBSXri }, 1373 { AArch64::ADDSWri, AArch64::ADDSXri } } 1374 }; 1375 bool Is64Bit = RetVT == MVT::i64; 1376 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1377 const TargetRegisterClass *RC; 1378 if (SetFlags) 1379 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1380 else 1381 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; 1382 unsigned ResultReg; 1383 if (WantResult) 1384 ResultReg = createResultReg(RC); 1385 else 1386 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1387 1388 const MCInstrDesc &II = TII.get(Opc); 1389 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1390 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1391 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1392 .addImm(Imm) 1393 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm)); 1394 return ResultReg; 1395 } 1396 1397 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, 1398 bool LHSIsKill, unsigned RHSReg, 1399 bool RHSIsKill, 1400 AArch64_AM::ShiftExtendType ShiftType, 1401 uint64_t ShiftImm, bool SetFlags, 1402 bool WantResult) { 1403 assert(LHSReg && RHSReg && "Invalid register number."); 1404 assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP && 1405 RHSReg != AArch64::SP && RHSReg != AArch64::WSP); 1406 1407 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1408 return 0; 1409 1410 // Don't deal with undefined shifts. 1411 if (ShiftImm >= RetVT.getSizeInBits()) 1412 return 0; 1413 1414 static const unsigned OpcTable[2][2][2] = { 1415 { { AArch64::SUBWrs, AArch64::SUBXrs }, 1416 { AArch64::ADDWrs, AArch64::ADDXrs } }, 1417 { { AArch64::SUBSWrs, AArch64::SUBSXrs }, 1418 { AArch64::ADDSWrs, AArch64::ADDSXrs } } 1419 }; 1420 bool Is64Bit = RetVT == MVT::i64; 1421 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1422 const TargetRegisterClass *RC = 1423 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1424 unsigned ResultReg; 1425 if (WantResult) 1426 ResultReg = createResultReg(RC); 1427 else 1428 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1429 1430 const MCInstrDesc &II = TII.get(Opc); 1431 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1432 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1433 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1434 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1435 .addReg(RHSReg, getKillRegState(RHSIsKill)) 1436 .addImm(getShifterImm(ShiftType, ShiftImm)); 1437 return ResultReg; 1438 } 1439 1440 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, 1441 bool LHSIsKill, unsigned RHSReg, 1442 bool RHSIsKill, 1443 AArch64_AM::ShiftExtendType ExtType, 1444 uint64_t ShiftImm, bool SetFlags, 1445 bool WantResult) { 1446 assert(LHSReg && RHSReg && "Invalid register number."); 1447 assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR && 1448 RHSReg != AArch64::XZR && RHSReg != AArch64::WZR); 1449 1450 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1451 return 0; 1452 1453 if (ShiftImm >= 4) 1454 return 0; 1455 1456 static const unsigned OpcTable[2][2][2] = { 1457 { { AArch64::SUBWrx, AArch64::SUBXrx }, 1458 { AArch64::ADDWrx, AArch64::ADDXrx } }, 1459 { { AArch64::SUBSWrx, AArch64::SUBSXrx }, 1460 { AArch64::ADDSWrx, AArch64::ADDSXrx } } 1461 }; 1462 bool Is64Bit = RetVT == MVT::i64; 1463 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1464 const TargetRegisterClass *RC = nullptr; 1465 if (SetFlags) 1466 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1467 else 1468 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; 1469 unsigned ResultReg; 1470 if (WantResult) 1471 ResultReg = createResultReg(RC); 1472 else 1473 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1474 1475 const MCInstrDesc &II = TII.get(Opc); 1476 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1477 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1478 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1479 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1480 .addReg(RHSReg, getKillRegState(RHSIsKill)) 1481 .addImm(getArithExtendImm(ExtType, ShiftImm)); 1482 return ResultReg; 1483 } 1484 1485 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) { 1486 Type *Ty = LHS->getType(); 1487 EVT EVT = TLI.getValueType(DL, Ty, true); 1488 if (!EVT.isSimple()) 1489 return false; 1490 MVT VT = EVT.getSimpleVT(); 1491 1492 switch (VT.SimpleTy) { 1493 default: 1494 return false; 1495 case MVT::i1: 1496 case MVT::i8: 1497 case MVT::i16: 1498 case MVT::i32: 1499 case MVT::i64: 1500 return emitICmp(VT, LHS, RHS, IsZExt); 1501 case MVT::f32: 1502 case MVT::f64: 1503 return emitFCmp(VT, LHS, RHS); 1504 } 1505 } 1506 1507 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, 1508 bool IsZExt) { 1509 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false, 1510 IsZExt) != 0; 1511 } 1512 1513 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, 1514 uint64_t Imm) { 1515 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm, 1516 /*SetFlags=*/true, /*WantResult=*/false) != 0; 1517 } 1518 1519 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) { 1520 if (RetVT != MVT::f32 && RetVT != MVT::f64) 1521 return false; 1522 1523 // Check to see if the 2nd operand is a constant that we can encode directly 1524 // in the compare. 1525 bool UseImm = false; 1526 if (const auto *CFP = dyn_cast<ConstantFP>(RHS)) 1527 if (CFP->isZero() && !CFP->isNegative()) 1528 UseImm = true; 1529 1530 unsigned LHSReg = getRegForValue(LHS); 1531 if (!LHSReg) 1532 return false; 1533 bool LHSIsKill = hasTrivialKill(LHS); 1534 1535 if (UseImm) { 1536 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri; 1537 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) 1538 .addReg(LHSReg, getKillRegState(LHSIsKill)); 1539 return true; 1540 } 1541 1542 unsigned RHSReg = getRegForValue(RHS); 1543 if (!RHSReg) 1544 return false; 1545 bool RHSIsKill = hasTrivialKill(RHS); 1546 1547 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr; 1548 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) 1549 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1550 .addReg(RHSReg, getKillRegState(RHSIsKill)); 1551 return true; 1552 } 1553 1554 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, 1555 bool SetFlags, bool WantResult, bool IsZExt) { 1556 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult, 1557 IsZExt); 1558 } 1559 1560 /// This method is a wrapper to simplify add emission. 1561 /// 1562 /// First try to emit an add with an immediate operand using emitAddSub_ri. If 1563 /// that fails, then try to materialize the immediate into a register and use 1564 /// emitAddSub_rr instead. 1565 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, 1566 int64_t Imm) { 1567 unsigned ResultReg; 1568 if (Imm < 0) 1569 ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm); 1570 else 1571 ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm); 1572 1573 if (ResultReg) 1574 return ResultReg; 1575 1576 unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm); 1577 if (!CReg) 1578 return 0; 1579 1580 ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true); 1581 return ResultReg; 1582 } 1583 1584 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS, 1585 bool SetFlags, bool WantResult, bool IsZExt) { 1586 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult, 1587 IsZExt); 1588 } 1589 1590 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg, 1591 bool LHSIsKill, unsigned RHSReg, 1592 bool RHSIsKill, bool WantResult) { 1593 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg, 1594 RHSIsKill, /*SetFlags=*/true, WantResult); 1595 } 1596 1597 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg, 1598 bool LHSIsKill, unsigned RHSReg, 1599 bool RHSIsKill, 1600 AArch64_AM::ShiftExtendType ShiftType, 1601 uint64_t ShiftImm, bool WantResult) { 1602 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg, 1603 RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true, 1604 WantResult); 1605 } 1606 1607 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, 1608 const Value *LHS, const Value *RHS) { 1609 // Canonicalize immediates to the RHS first. 1610 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS)) 1611 std::swap(LHS, RHS); 1612 1613 // Canonicalize mul by power-of-2 to the RHS. 1614 if (LHS->hasOneUse() && isValueAvailable(LHS)) 1615 if (isMulPowOf2(LHS)) 1616 std::swap(LHS, RHS); 1617 1618 // Canonicalize shift immediate to the RHS. 1619 if (LHS->hasOneUse() && isValueAvailable(LHS)) 1620 if (const auto *SI = dyn_cast<ShlOperator>(LHS)) 1621 if (isa<ConstantInt>(SI->getOperand(1))) 1622 std::swap(LHS, RHS); 1623 1624 unsigned LHSReg = getRegForValue(LHS); 1625 if (!LHSReg) 1626 return 0; 1627 bool LHSIsKill = hasTrivialKill(LHS); 1628 1629 unsigned ResultReg = 0; 1630 if (const auto *C = dyn_cast<ConstantInt>(RHS)) { 1631 uint64_t Imm = C->getZExtValue(); 1632 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm); 1633 } 1634 if (ResultReg) 1635 return ResultReg; 1636 1637 // Check if the mul can be folded into the instruction. 1638 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1639 if (isMulPowOf2(RHS)) { 1640 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); 1641 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); 1642 1643 if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) 1644 if (C->getValue().isPowerOf2()) 1645 std::swap(MulLHS, MulRHS); 1646 1647 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); 1648 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); 1649 1650 unsigned RHSReg = getRegForValue(MulLHS); 1651 if (!RHSReg) 1652 return 0; 1653 bool RHSIsKill = hasTrivialKill(MulLHS); 1654 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, 1655 RHSIsKill, ShiftVal); 1656 if (ResultReg) 1657 return ResultReg; 1658 } 1659 } 1660 1661 // Check if the shift can be folded into the instruction. 1662 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1663 if (const auto *SI = dyn_cast<ShlOperator>(RHS)) 1664 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1665 uint64_t ShiftVal = C->getZExtValue(); 1666 unsigned RHSReg = getRegForValue(SI->getOperand(0)); 1667 if (!RHSReg) 1668 return 0; 1669 bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); 1670 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, 1671 RHSIsKill, ShiftVal); 1672 if (ResultReg) 1673 return ResultReg; 1674 } 1675 } 1676 1677 unsigned RHSReg = getRegForValue(RHS); 1678 if (!RHSReg) 1679 return 0; 1680 bool RHSIsKill = hasTrivialKill(RHS); 1681 1682 MVT VT = std::max(MVT::i32, RetVT.SimpleTy); 1683 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill); 1684 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { 1685 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1686 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 1687 } 1688 return ResultReg; 1689 } 1690 1691 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, 1692 unsigned LHSReg, bool LHSIsKill, 1693 uint64_t Imm) { 1694 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), 1695 "ISD nodes are not consecutive!"); 1696 static const unsigned OpcTable[3][2] = { 1697 { AArch64::ANDWri, AArch64::ANDXri }, 1698 { AArch64::ORRWri, AArch64::ORRXri }, 1699 { AArch64::EORWri, AArch64::EORXri } 1700 }; 1701 const TargetRegisterClass *RC; 1702 unsigned Opc; 1703 unsigned RegSize; 1704 switch (RetVT.SimpleTy) { 1705 default: 1706 return 0; 1707 case MVT::i1: 1708 case MVT::i8: 1709 case MVT::i16: 1710 case MVT::i32: { 1711 unsigned Idx = ISDOpc - ISD::AND; 1712 Opc = OpcTable[Idx][0]; 1713 RC = &AArch64::GPR32spRegClass; 1714 RegSize = 32; 1715 break; 1716 } 1717 case MVT::i64: 1718 Opc = OpcTable[ISDOpc - ISD::AND][1]; 1719 RC = &AArch64::GPR64spRegClass; 1720 RegSize = 64; 1721 break; 1722 } 1723 1724 if (!AArch64_AM::isLogicalImmediate(Imm, RegSize)) 1725 return 0; 1726 1727 unsigned ResultReg = 1728 fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill, 1729 AArch64_AM::encodeLogicalImmediate(Imm, RegSize)); 1730 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) { 1731 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1732 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 1733 } 1734 return ResultReg; 1735 } 1736 1737 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, 1738 unsigned LHSReg, bool LHSIsKill, 1739 unsigned RHSReg, bool RHSIsKill, 1740 uint64_t ShiftImm) { 1741 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), 1742 "ISD nodes are not consecutive!"); 1743 static const unsigned OpcTable[3][2] = { 1744 { AArch64::ANDWrs, AArch64::ANDXrs }, 1745 { AArch64::ORRWrs, AArch64::ORRXrs }, 1746 { AArch64::EORWrs, AArch64::EORXrs } 1747 }; 1748 1749 // Don't deal with undefined shifts. 1750 if (ShiftImm >= RetVT.getSizeInBits()) 1751 return 0; 1752 1753 const TargetRegisterClass *RC; 1754 unsigned Opc; 1755 switch (RetVT.SimpleTy) { 1756 default: 1757 return 0; 1758 case MVT::i1: 1759 case MVT::i8: 1760 case MVT::i16: 1761 case MVT::i32: 1762 Opc = OpcTable[ISDOpc - ISD::AND][0]; 1763 RC = &AArch64::GPR32RegClass; 1764 break; 1765 case MVT::i64: 1766 Opc = OpcTable[ISDOpc - ISD::AND][1]; 1767 RC = &AArch64::GPR64RegClass; 1768 break; 1769 } 1770 unsigned ResultReg = 1771 fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill, 1772 AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm)); 1773 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { 1774 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1775 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 1776 } 1777 return ResultReg; 1778 } 1779 1780 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, 1781 uint64_t Imm) { 1782 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm); 1783 } 1784 1785 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr, 1786 bool WantZExt, MachineMemOperand *MMO) { 1787 if (!TLI.allowsMisalignedMemoryAccesses(VT)) 1788 return 0; 1789 1790 // Simplify this down to something we can handle. 1791 if (!simplifyAddress(Addr, VT)) 1792 return 0; 1793 1794 unsigned ScaleFactor = getImplicitScaleFactor(VT); 1795 if (!ScaleFactor) 1796 llvm_unreachable("Unexpected value type."); 1797 1798 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 1799 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 1800 bool UseScaled = true; 1801 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { 1802 UseScaled = false; 1803 ScaleFactor = 1; 1804 } 1805 1806 static const unsigned GPOpcTable[2][8][4] = { 1807 // Sign-extend. 1808 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi, 1809 AArch64::LDURXi }, 1810 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi, 1811 AArch64::LDURXi }, 1812 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui, 1813 AArch64::LDRXui }, 1814 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui, 1815 AArch64::LDRXui }, 1816 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX, 1817 AArch64::LDRXroX }, 1818 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX, 1819 AArch64::LDRXroX }, 1820 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW, 1821 AArch64::LDRXroW }, 1822 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW, 1823 AArch64::LDRXroW } 1824 }, 1825 // Zero-extend. 1826 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, 1827 AArch64::LDURXi }, 1828 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, 1829 AArch64::LDURXi }, 1830 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, 1831 AArch64::LDRXui }, 1832 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, 1833 AArch64::LDRXui }, 1834 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, 1835 AArch64::LDRXroX }, 1836 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, 1837 AArch64::LDRXroX }, 1838 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, 1839 AArch64::LDRXroW }, 1840 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, 1841 AArch64::LDRXroW } 1842 } 1843 }; 1844 1845 static const unsigned FPOpcTable[4][2] = { 1846 { AArch64::LDURSi, AArch64::LDURDi }, 1847 { AArch64::LDRSui, AArch64::LDRDui }, 1848 { AArch64::LDRSroX, AArch64::LDRDroX }, 1849 { AArch64::LDRSroW, AArch64::LDRDroW } 1850 }; 1851 1852 unsigned Opc; 1853 const TargetRegisterClass *RC; 1854 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && 1855 Addr.getOffsetReg(); 1856 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; 1857 if (Addr.getExtendType() == AArch64_AM::UXTW || 1858 Addr.getExtendType() == AArch64_AM::SXTW) 1859 Idx++; 1860 1861 bool IsRet64Bit = RetVT == MVT::i64; 1862 switch (VT.SimpleTy) { 1863 default: 1864 llvm_unreachable("Unexpected value type."); 1865 case MVT::i1: // Intentional fall-through. 1866 case MVT::i8: 1867 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0]; 1868 RC = (IsRet64Bit && !WantZExt) ? 1869 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1870 break; 1871 case MVT::i16: 1872 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1]; 1873 RC = (IsRet64Bit && !WantZExt) ? 1874 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1875 break; 1876 case MVT::i32: 1877 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2]; 1878 RC = (IsRet64Bit && !WantZExt) ? 1879 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1880 break; 1881 case MVT::i64: 1882 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3]; 1883 RC = &AArch64::GPR64RegClass; 1884 break; 1885 case MVT::f32: 1886 Opc = FPOpcTable[Idx][0]; 1887 RC = &AArch64::FPR32RegClass; 1888 break; 1889 case MVT::f64: 1890 Opc = FPOpcTable[Idx][1]; 1891 RC = &AArch64::FPR64RegClass; 1892 break; 1893 } 1894 1895 // Create the base instruction, then add the operands. 1896 unsigned ResultReg = createResultReg(RC); 1897 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1898 TII.get(Opc), ResultReg); 1899 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO); 1900 1901 // Loading an i1 requires special handling. 1902 if (VT == MVT::i1) { 1903 unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1); 1904 assert(ANDReg && "Unexpected AND instruction emission failure."); 1905 ResultReg = ANDReg; 1906 } 1907 1908 // For zero-extending loads to 64bit we emit a 32bit load and then convert 1909 // the 32bit reg to a 64bit reg. 1910 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) { 1911 unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass); 1912 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1913 TII.get(AArch64::SUBREG_TO_REG), Reg64) 1914 .addImm(0) 1915 .addReg(ResultReg, getKillRegState(true)) 1916 .addImm(AArch64::sub_32); 1917 ResultReg = Reg64; 1918 } 1919 return ResultReg; 1920 } 1921 1922 bool AArch64FastISel::selectAddSub(const Instruction *I) { 1923 MVT VT; 1924 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 1925 return false; 1926 1927 if (VT.isVector()) 1928 return selectOperator(I, I->getOpcode()); 1929 1930 unsigned ResultReg; 1931 switch (I->getOpcode()) { 1932 default: 1933 llvm_unreachable("Unexpected instruction."); 1934 case Instruction::Add: 1935 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1)); 1936 break; 1937 case Instruction::Sub: 1938 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1)); 1939 break; 1940 } 1941 if (!ResultReg) 1942 return false; 1943 1944 updateValueMap(I, ResultReg); 1945 return true; 1946 } 1947 1948 bool AArch64FastISel::selectLogicalOp(const Instruction *I) { 1949 MVT VT; 1950 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 1951 return false; 1952 1953 if (VT.isVector()) 1954 return selectOperator(I, I->getOpcode()); 1955 1956 unsigned ResultReg; 1957 switch (I->getOpcode()) { 1958 default: 1959 llvm_unreachable("Unexpected instruction."); 1960 case Instruction::And: 1961 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1)); 1962 break; 1963 case Instruction::Or: 1964 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1)); 1965 break; 1966 case Instruction::Xor: 1967 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1)); 1968 break; 1969 } 1970 if (!ResultReg) 1971 return false; 1972 1973 updateValueMap(I, ResultReg); 1974 return true; 1975 } 1976 1977 bool AArch64FastISel::selectLoad(const Instruction *I) { 1978 MVT VT; 1979 // Verify we have a legal type before going any further. Currently, we handle 1980 // simple types that will directly fit in a register (i32/f32/i64/f64) or 1981 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 1982 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) || 1983 cast<LoadInst>(I)->isAtomic()) 1984 return false; 1985 1986 const Value *SV = I->getOperand(0); 1987 if (TLI.supportSwiftError()) { 1988 // Swifterror values can come from either a function parameter with 1989 // swifterror attribute or an alloca with swifterror attribute. 1990 if (const Argument *Arg = dyn_cast<Argument>(SV)) { 1991 if (Arg->hasSwiftErrorAttr()) 1992 return false; 1993 } 1994 1995 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) { 1996 if (Alloca->isSwiftError()) 1997 return false; 1998 } 1999 } 2000 2001 // See if we can handle this address. 2002 Address Addr; 2003 if (!computeAddress(I->getOperand(0), Addr, I->getType())) 2004 return false; 2005 2006 // Fold the following sign-/zero-extend into the load instruction. 2007 bool WantZExt = true; 2008 MVT RetVT = VT; 2009 const Value *IntExtVal = nullptr; 2010 if (I->hasOneUse()) { 2011 if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) { 2012 if (isTypeSupported(ZE->getType(), RetVT)) 2013 IntExtVal = ZE; 2014 else 2015 RetVT = VT; 2016 } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) { 2017 if (isTypeSupported(SE->getType(), RetVT)) 2018 IntExtVal = SE; 2019 else 2020 RetVT = VT; 2021 WantZExt = false; 2022 } 2023 } 2024 2025 unsigned ResultReg = 2026 emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I)); 2027 if (!ResultReg) 2028 return false; 2029 2030 // There are a few different cases we have to handle, because the load or the 2031 // sign-/zero-extend might not be selected by FastISel if we fall-back to 2032 // SelectionDAG. There is also an ordering issue when both instructions are in 2033 // different basic blocks. 2034 // 1.) The load instruction is selected by FastISel, but the integer extend 2035 // not. This usually happens when the integer extend is in a different 2036 // basic block and SelectionDAG took over for that basic block. 2037 // 2.) The load instruction is selected before the integer extend. This only 2038 // happens when the integer extend is in a different basic block. 2039 // 3.) The load instruction is selected by SelectionDAG and the integer extend 2040 // by FastISel. This happens if there are instructions between the load 2041 // and the integer extend that couldn't be selected by FastISel. 2042 if (IntExtVal) { 2043 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG 2044 // could select it. Emit a copy to subreg if necessary. FastISel will remove 2045 // it when it selects the integer extend. 2046 unsigned Reg = lookUpRegForValue(IntExtVal); 2047 auto *MI = MRI.getUniqueVRegDef(Reg); 2048 if (!MI) { 2049 if (RetVT == MVT::i64 && VT <= MVT::i32) { 2050 if (WantZExt) { 2051 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG). 2052 MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt)); 2053 ResultReg = std::prev(I)->getOperand(0).getReg(); 2054 removeDeadCode(I, std::next(I)); 2055 } else 2056 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg, 2057 /*IsKill=*/true, 2058 AArch64::sub_32); 2059 } 2060 updateValueMap(I, ResultReg); 2061 return true; 2062 } 2063 2064 // The integer extend has already been emitted - delete all the instructions 2065 // that have been emitted by the integer extend lowering code and use the 2066 // result from the load instruction directly. 2067 while (MI) { 2068 Reg = 0; 2069 for (auto &Opnd : MI->uses()) { 2070 if (Opnd.isReg()) { 2071 Reg = Opnd.getReg(); 2072 break; 2073 } 2074 } 2075 MachineBasicBlock::iterator I(MI); 2076 removeDeadCode(I, std::next(I)); 2077 MI = nullptr; 2078 if (Reg) 2079 MI = MRI.getUniqueVRegDef(Reg); 2080 } 2081 updateValueMap(IntExtVal, ResultReg); 2082 return true; 2083 } 2084 2085 updateValueMap(I, ResultReg); 2086 return true; 2087 } 2088 2089 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg, 2090 unsigned AddrReg, 2091 MachineMemOperand *MMO) { 2092 unsigned Opc; 2093 switch (VT.SimpleTy) { 2094 default: return false; 2095 case MVT::i8: Opc = AArch64::STLRB; break; 2096 case MVT::i16: Opc = AArch64::STLRH; break; 2097 case MVT::i32: Opc = AArch64::STLRW; break; 2098 case MVT::i64: Opc = AArch64::STLRX; break; 2099 } 2100 2101 const MCInstrDesc &II = TII.get(Opc); 2102 SrcReg = constrainOperandRegClass(II, SrcReg, 0); 2103 AddrReg = constrainOperandRegClass(II, AddrReg, 1); 2104 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) 2105 .addReg(SrcReg) 2106 .addReg(AddrReg) 2107 .addMemOperand(MMO); 2108 return true; 2109 } 2110 2111 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr, 2112 MachineMemOperand *MMO) { 2113 if (!TLI.allowsMisalignedMemoryAccesses(VT)) 2114 return false; 2115 2116 // Simplify this down to something we can handle. 2117 if (!simplifyAddress(Addr, VT)) 2118 return false; 2119 2120 unsigned ScaleFactor = getImplicitScaleFactor(VT); 2121 if (!ScaleFactor) 2122 llvm_unreachable("Unexpected value type."); 2123 2124 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 2125 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 2126 bool UseScaled = true; 2127 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { 2128 UseScaled = false; 2129 ScaleFactor = 1; 2130 } 2131 2132 static const unsigned OpcTable[4][6] = { 2133 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi, 2134 AArch64::STURSi, AArch64::STURDi }, 2135 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui, 2136 AArch64::STRSui, AArch64::STRDui }, 2137 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX, 2138 AArch64::STRSroX, AArch64::STRDroX }, 2139 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW, 2140 AArch64::STRSroW, AArch64::STRDroW } 2141 }; 2142 2143 unsigned Opc; 2144 bool VTIsi1 = false; 2145 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && 2146 Addr.getOffsetReg(); 2147 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; 2148 if (Addr.getExtendType() == AArch64_AM::UXTW || 2149 Addr.getExtendType() == AArch64_AM::SXTW) 2150 Idx++; 2151 2152 switch (VT.SimpleTy) { 2153 default: llvm_unreachable("Unexpected value type."); 2154 case MVT::i1: VTIsi1 = true; LLVM_FALLTHROUGH; 2155 case MVT::i8: Opc = OpcTable[Idx][0]; break; 2156 case MVT::i16: Opc = OpcTable[Idx][1]; break; 2157 case MVT::i32: Opc = OpcTable[Idx][2]; break; 2158 case MVT::i64: Opc = OpcTable[Idx][3]; break; 2159 case MVT::f32: Opc = OpcTable[Idx][4]; break; 2160 case MVT::f64: Opc = OpcTable[Idx][5]; break; 2161 } 2162 2163 // Storing an i1 requires special handling. 2164 if (VTIsi1 && SrcReg != AArch64::WZR) { 2165 unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1); 2166 assert(ANDReg && "Unexpected AND instruction emission failure."); 2167 SrcReg = ANDReg; 2168 } 2169 // Create the base instruction, then add the operands. 2170 const MCInstrDesc &II = TII.get(Opc); 2171 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); 2172 MachineInstrBuilder MIB = 2173 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg); 2174 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO); 2175 2176 return true; 2177 } 2178 2179 bool AArch64FastISel::selectStore(const Instruction *I) { 2180 MVT VT; 2181 const Value *Op0 = I->getOperand(0); 2182 // Verify we have a legal type before going any further. Currently, we handle 2183 // simple types that will directly fit in a register (i32/f32/i64/f64) or 2184 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 2185 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true)) 2186 return false; 2187 2188 const Value *PtrV = I->getOperand(1); 2189 if (TLI.supportSwiftError()) { 2190 // Swifterror values can come from either a function parameter with 2191 // swifterror attribute or an alloca with swifterror attribute. 2192 if (const Argument *Arg = dyn_cast<Argument>(PtrV)) { 2193 if (Arg->hasSwiftErrorAttr()) 2194 return false; 2195 } 2196 2197 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) { 2198 if (Alloca->isSwiftError()) 2199 return false; 2200 } 2201 } 2202 2203 // Get the value to be stored into a register. Use the zero register directly 2204 // when possible to avoid an unnecessary copy and a wasted register. 2205 unsigned SrcReg = 0; 2206 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) { 2207 if (CI->isZero()) 2208 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 2209 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) { 2210 if (CF->isZero() && !CF->isNegative()) { 2211 VT = MVT::getIntegerVT(VT.getSizeInBits()); 2212 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 2213 } 2214 } 2215 2216 if (!SrcReg) 2217 SrcReg = getRegForValue(Op0); 2218 2219 if (!SrcReg) 2220 return false; 2221 2222 auto *SI = cast<StoreInst>(I); 2223 2224 // Try to emit a STLR for seq_cst/release. 2225 if (SI->isAtomic()) { 2226 AtomicOrdering Ord = SI->getOrdering(); 2227 // The non-atomic instructions are sufficient for relaxed stores. 2228 if (isReleaseOrStronger(Ord)) { 2229 // The STLR addressing mode only supports a base reg; pass that directly. 2230 unsigned AddrReg = getRegForValue(PtrV); 2231 return emitStoreRelease(VT, SrcReg, AddrReg, 2232 createMachineMemOperandFor(I)); 2233 } 2234 } 2235 2236 // See if we can handle this address. 2237 Address Addr; 2238 if (!computeAddress(PtrV, Addr, Op0->getType())) 2239 return false; 2240 2241 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I))) 2242 return false; 2243 return true; 2244 } 2245 2246 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) { 2247 switch (Pred) { 2248 case CmpInst::FCMP_ONE: 2249 case CmpInst::FCMP_UEQ: 2250 default: 2251 // AL is our "false" for now. The other two need more compares. 2252 return AArch64CC::AL; 2253 case CmpInst::ICMP_EQ: 2254 case CmpInst::FCMP_OEQ: 2255 return AArch64CC::EQ; 2256 case CmpInst::ICMP_SGT: 2257 case CmpInst::FCMP_OGT: 2258 return AArch64CC::GT; 2259 case CmpInst::ICMP_SGE: 2260 case CmpInst::FCMP_OGE: 2261 return AArch64CC::GE; 2262 case CmpInst::ICMP_UGT: 2263 case CmpInst::FCMP_UGT: 2264 return AArch64CC::HI; 2265 case CmpInst::FCMP_OLT: 2266 return AArch64CC::MI; 2267 case CmpInst::ICMP_ULE: 2268 case CmpInst::FCMP_OLE: 2269 return AArch64CC::LS; 2270 case CmpInst::FCMP_ORD: 2271 return AArch64CC::VC; 2272 case CmpInst::FCMP_UNO: 2273 return AArch64CC::VS; 2274 case CmpInst::FCMP_UGE: 2275 return AArch64CC::PL; 2276 case CmpInst::ICMP_SLT: 2277 case CmpInst::FCMP_ULT: 2278 return AArch64CC::LT; 2279 case CmpInst::ICMP_SLE: 2280 case CmpInst::FCMP_ULE: 2281 return AArch64CC::LE; 2282 case CmpInst::FCMP_UNE: 2283 case CmpInst::ICMP_NE: 2284 return AArch64CC::NE; 2285 case CmpInst::ICMP_UGE: 2286 return AArch64CC::HS; 2287 case CmpInst::ICMP_ULT: 2288 return AArch64CC::LO; 2289 } 2290 } 2291 2292 /// Try to emit a combined compare-and-branch instruction. 2293 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { 2294 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions 2295 // will not be produced, as they are conditional branch instructions that do 2296 // not set flags. 2297 if (FuncInfo.MF->getFunction().hasFnAttribute( 2298 Attribute::SpeculativeLoadHardening)) 2299 return false; 2300 2301 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction"); 2302 const CmpInst *CI = cast<CmpInst>(BI->getCondition()); 2303 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2304 2305 const Value *LHS = CI->getOperand(0); 2306 const Value *RHS = CI->getOperand(1); 2307 2308 MVT VT; 2309 if (!isTypeSupported(LHS->getType(), VT)) 2310 return false; 2311 2312 unsigned BW = VT.getSizeInBits(); 2313 if (BW > 64) 2314 return false; 2315 2316 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2317 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 2318 2319 // Try to take advantage of fallthrough opportunities. 2320 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2321 std::swap(TBB, FBB); 2322 Predicate = CmpInst::getInversePredicate(Predicate); 2323 } 2324 2325 int TestBit = -1; 2326 bool IsCmpNE; 2327 switch (Predicate) { 2328 default: 2329 return false; 2330 case CmpInst::ICMP_EQ: 2331 case CmpInst::ICMP_NE: 2332 if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue()) 2333 std::swap(LHS, RHS); 2334 2335 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) 2336 return false; 2337 2338 if (const auto *AI = dyn_cast<BinaryOperator>(LHS)) 2339 if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) { 2340 const Value *AndLHS = AI->getOperand(0); 2341 const Value *AndRHS = AI->getOperand(1); 2342 2343 if (const auto *C = dyn_cast<ConstantInt>(AndLHS)) 2344 if (C->getValue().isPowerOf2()) 2345 std::swap(AndLHS, AndRHS); 2346 2347 if (const auto *C = dyn_cast<ConstantInt>(AndRHS)) 2348 if (C->getValue().isPowerOf2()) { 2349 TestBit = C->getValue().logBase2(); 2350 LHS = AndLHS; 2351 } 2352 } 2353 2354 if (VT == MVT::i1) 2355 TestBit = 0; 2356 2357 IsCmpNE = Predicate == CmpInst::ICMP_NE; 2358 break; 2359 case CmpInst::ICMP_SLT: 2360 case CmpInst::ICMP_SGE: 2361 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) 2362 return false; 2363 2364 TestBit = BW - 1; 2365 IsCmpNE = Predicate == CmpInst::ICMP_SLT; 2366 break; 2367 case CmpInst::ICMP_SGT: 2368 case CmpInst::ICMP_SLE: 2369 if (!isa<ConstantInt>(RHS)) 2370 return false; 2371 2372 if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true)) 2373 return false; 2374 2375 TestBit = BW - 1; 2376 IsCmpNE = Predicate == CmpInst::ICMP_SLE; 2377 break; 2378 } // end switch 2379 2380 static const unsigned OpcTable[2][2][2] = { 2381 { {AArch64::CBZW, AArch64::CBZX }, 2382 {AArch64::CBNZW, AArch64::CBNZX} }, 2383 { {AArch64::TBZW, AArch64::TBZX }, 2384 {AArch64::TBNZW, AArch64::TBNZX} } 2385 }; 2386 2387 bool IsBitTest = TestBit != -1; 2388 bool Is64Bit = BW == 64; 2389 if (TestBit < 32 && TestBit >= 0) 2390 Is64Bit = false; 2391 2392 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit]; 2393 const MCInstrDesc &II = TII.get(Opc); 2394 2395 unsigned SrcReg = getRegForValue(LHS); 2396 if (!SrcReg) 2397 return false; 2398 bool SrcIsKill = hasTrivialKill(LHS); 2399 2400 if (BW == 64 && !Is64Bit) 2401 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill, 2402 AArch64::sub_32); 2403 2404 if ((BW < 32) && !IsBitTest) 2405 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true); 2406 2407 // Emit the combined compare and branch instruction. 2408 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); 2409 MachineInstrBuilder MIB = 2410 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) 2411 .addReg(SrcReg, getKillRegState(SrcIsKill)); 2412 if (IsBitTest) 2413 MIB.addImm(TestBit); 2414 MIB.addMBB(TBB); 2415 2416 finishCondBranch(BI->getParent(), TBB, FBB); 2417 return true; 2418 } 2419 2420 bool AArch64FastISel::selectBranch(const Instruction *I) { 2421 const BranchInst *BI = cast<BranchInst>(I); 2422 if (BI->isUnconditional()) { 2423 MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2424 fastEmitBranch(MSucc, BI->getDebugLoc()); 2425 return true; 2426 } 2427 2428 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2429 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 2430 2431 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 2432 if (CI->hasOneUse() && isValueAvailable(CI)) { 2433 // Try to optimize or fold the cmp. 2434 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2435 switch (Predicate) { 2436 default: 2437 break; 2438 case CmpInst::FCMP_FALSE: 2439 fastEmitBranch(FBB, DbgLoc); 2440 return true; 2441 case CmpInst::FCMP_TRUE: 2442 fastEmitBranch(TBB, DbgLoc); 2443 return true; 2444 } 2445 2446 // Try to emit a combined compare-and-branch first. 2447 if (emitCompareAndBranch(BI)) 2448 return true; 2449 2450 // Try to take advantage of fallthrough opportunities. 2451 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2452 std::swap(TBB, FBB); 2453 Predicate = CmpInst::getInversePredicate(Predicate); 2454 } 2455 2456 // Emit the cmp. 2457 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 2458 return false; 2459 2460 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch 2461 // instruction. 2462 AArch64CC::CondCode CC = getCompareCC(Predicate); 2463 AArch64CC::CondCode ExtraCC = AArch64CC::AL; 2464 switch (Predicate) { 2465 default: 2466 break; 2467 case CmpInst::FCMP_UEQ: 2468 ExtraCC = AArch64CC::EQ; 2469 CC = AArch64CC::VS; 2470 break; 2471 case CmpInst::FCMP_ONE: 2472 ExtraCC = AArch64CC::MI; 2473 CC = AArch64CC::GT; 2474 break; 2475 } 2476 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2477 2478 // Emit the extra branch for FCMP_UEQ and FCMP_ONE. 2479 if (ExtraCC != AArch64CC::AL) { 2480 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 2481 .addImm(ExtraCC) 2482 .addMBB(TBB); 2483 } 2484 2485 // Emit the branch. 2486 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 2487 .addImm(CC) 2488 .addMBB(TBB); 2489 2490 finishCondBranch(BI->getParent(), TBB, FBB); 2491 return true; 2492 } 2493 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) { 2494 uint64_t Imm = CI->getZExtValue(); 2495 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; 2496 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B)) 2497 .addMBB(Target); 2498 2499 // Obtain the branch probability and add the target to the successor list. 2500 if (FuncInfo.BPI) { 2501 auto BranchProbability = FuncInfo.BPI->getEdgeProbability( 2502 BI->getParent(), Target->getBasicBlock()); 2503 FuncInfo.MBB->addSuccessor(Target, BranchProbability); 2504 } else 2505 FuncInfo.MBB->addSuccessorWithoutProb(Target); 2506 return true; 2507 } else { 2508 AArch64CC::CondCode CC = AArch64CC::NE; 2509 if (foldXALUIntrinsic(CC, I, BI->getCondition())) { 2510 // Fake request the condition, otherwise the intrinsic might be completely 2511 // optimized away. 2512 unsigned CondReg = getRegForValue(BI->getCondition()); 2513 if (!CondReg) 2514 return false; 2515 2516 // Emit the branch. 2517 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 2518 .addImm(CC) 2519 .addMBB(TBB); 2520 2521 finishCondBranch(BI->getParent(), TBB, FBB); 2522 return true; 2523 } 2524 } 2525 2526 unsigned CondReg = getRegForValue(BI->getCondition()); 2527 if (CondReg == 0) 2528 return false; 2529 bool CondRegIsKill = hasTrivialKill(BI->getCondition()); 2530 2531 // i1 conditions come as i32 values, test the lowest bit with tb(n)z. 2532 unsigned Opcode = AArch64::TBNZW; 2533 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2534 std::swap(TBB, FBB); 2535 Opcode = AArch64::TBZW; 2536 } 2537 2538 const MCInstrDesc &II = TII.get(Opcode); 2539 unsigned ConstrainedCondReg 2540 = constrainOperandRegClass(II, CondReg, II.getNumDefs()); 2541 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) 2542 .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill)) 2543 .addImm(0) 2544 .addMBB(TBB); 2545 2546 finishCondBranch(BI->getParent(), TBB, FBB); 2547 return true; 2548 } 2549 2550 bool AArch64FastISel::selectIndirectBr(const Instruction *I) { 2551 const IndirectBrInst *BI = cast<IndirectBrInst>(I); 2552 unsigned AddrReg = getRegForValue(BI->getOperand(0)); 2553 if (AddrReg == 0) 2554 return false; 2555 2556 // Emit the indirect branch. 2557 const MCInstrDesc &II = TII.get(AArch64::BR); 2558 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs()); 2559 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg); 2560 2561 // Make sure the CFG is up-to-date. 2562 for (auto *Succ : BI->successors()) 2563 FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]); 2564 2565 return true; 2566 } 2567 2568 bool AArch64FastISel::selectCmp(const Instruction *I) { 2569 const CmpInst *CI = cast<CmpInst>(I); 2570 2571 // Vectors of i1 are weird: bail out. 2572 if (CI->getType()->isVectorTy()) 2573 return false; 2574 2575 // Try to optimize or fold the cmp. 2576 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2577 unsigned ResultReg = 0; 2578 switch (Predicate) { 2579 default: 2580 break; 2581 case CmpInst::FCMP_FALSE: 2582 ResultReg = createResultReg(&AArch64::GPR32RegClass); 2583 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2584 TII.get(TargetOpcode::COPY), ResultReg) 2585 .addReg(AArch64::WZR, getKillRegState(true)); 2586 break; 2587 case CmpInst::FCMP_TRUE: 2588 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1); 2589 break; 2590 } 2591 2592 if (ResultReg) { 2593 updateValueMap(I, ResultReg); 2594 return true; 2595 } 2596 2597 // Emit the cmp. 2598 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 2599 return false; 2600 2601 ResultReg = createResultReg(&AArch64::GPR32RegClass); 2602 2603 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These 2604 // condition codes are inverted, because they are used by CSINC. 2605 static unsigned CondCodeTable[2][2] = { 2606 { AArch64CC::NE, AArch64CC::VC }, 2607 { AArch64CC::PL, AArch64CC::LE } 2608 }; 2609 unsigned *CondCodes = nullptr; 2610 switch (Predicate) { 2611 default: 2612 break; 2613 case CmpInst::FCMP_UEQ: 2614 CondCodes = &CondCodeTable[0][0]; 2615 break; 2616 case CmpInst::FCMP_ONE: 2617 CondCodes = &CondCodeTable[1][0]; 2618 break; 2619 } 2620 2621 if (CondCodes) { 2622 unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass); 2623 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), 2624 TmpReg1) 2625 .addReg(AArch64::WZR, getKillRegState(true)) 2626 .addReg(AArch64::WZR, getKillRegState(true)) 2627 .addImm(CondCodes[0]); 2628 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), 2629 ResultReg) 2630 .addReg(TmpReg1, getKillRegState(true)) 2631 .addReg(AArch64::WZR, getKillRegState(true)) 2632 .addImm(CondCodes[1]); 2633 2634 updateValueMap(I, ResultReg); 2635 return true; 2636 } 2637 2638 // Now set a register based on the comparison. 2639 AArch64CC::CondCode CC = getCompareCC(Predicate); 2640 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2641 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC); 2642 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), 2643 ResultReg) 2644 .addReg(AArch64::WZR, getKillRegState(true)) 2645 .addReg(AArch64::WZR, getKillRegState(true)) 2646 .addImm(invertedCC); 2647 2648 updateValueMap(I, ResultReg); 2649 return true; 2650 } 2651 2652 /// Optimize selects of i1 if one of the operands has a 'true' or 'false' 2653 /// value. 2654 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) { 2655 if (!SI->getType()->isIntegerTy(1)) 2656 return false; 2657 2658 const Value *Src1Val, *Src2Val; 2659 unsigned Opc = 0; 2660 bool NeedExtraOp = false; 2661 if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) { 2662 if (CI->isOne()) { 2663 Src1Val = SI->getCondition(); 2664 Src2Val = SI->getFalseValue(); 2665 Opc = AArch64::ORRWrr; 2666 } else { 2667 assert(CI->isZero()); 2668 Src1Val = SI->getFalseValue(); 2669 Src2Val = SI->getCondition(); 2670 Opc = AArch64::BICWrr; 2671 } 2672 } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) { 2673 if (CI->isOne()) { 2674 Src1Val = SI->getCondition(); 2675 Src2Val = SI->getTrueValue(); 2676 Opc = AArch64::ORRWrr; 2677 NeedExtraOp = true; 2678 } else { 2679 assert(CI->isZero()); 2680 Src1Val = SI->getCondition(); 2681 Src2Val = SI->getTrueValue(); 2682 Opc = AArch64::ANDWrr; 2683 } 2684 } 2685 2686 if (!Opc) 2687 return false; 2688 2689 unsigned Src1Reg = getRegForValue(Src1Val); 2690 if (!Src1Reg) 2691 return false; 2692 bool Src1IsKill = hasTrivialKill(Src1Val); 2693 2694 unsigned Src2Reg = getRegForValue(Src2Val); 2695 if (!Src2Reg) 2696 return false; 2697 bool Src2IsKill = hasTrivialKill(Src2Val); 2698 2699 if (NeedExtraOp) { 2700 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1); 2701 Src1IsKill = true; 2702 } 2703 unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg, 2704 Src1IsKill, Src2Reg, Src2IsKill); 2705 updateValueMap(SI, ResultReg); 2706 return true; 2707 } 2708 2709 bool AArch64FastISel::selectSelect(const Instruction *I) { 2710 assert(isa<SelectInst>(I) && "Expected a select instruction."); 2711 MVT VT; 2712 if (!isTypeSupported(I->getType(), VT)) 2713 return false; 2714 2715 unsigned Opc; 2716 const TargetRegisterClass *RC; 2717 switch (VT.SimpleTy) { 2718 default: 2719 return false; 2720 case MVT::i1: 2721 case MVT::i8: 2722 case MVT::i16: 2723 case MVT::i32: 2724 Opc = AArch64::CSELWr; 2725 RC = &AArch64::GPR32RegClass; 2726 break; 2727 case MVT::i64: 2728 Opc = AArch64::CSELXr; 2729 RC = &AArch64::GPR64RegClass; 2730 break; 2731 case MVT::f32: 2732 Opc = AArch64::FCSELSrrr; 2733 RC = &AArch64::FPR32RegClass; 2734 break; 2735 case MVT::f64: 2736 Opc = AArch64::FCSELDrrr; 2737 RC = &AArch64::FPR64RegClass; 2738 break; 2739 } 2740 2741 const SelectInst *SI = cast<SelectInst>(I); 2742 const Value *Cond = SI->getCondition(); 2743 AArch64CC::CondCode CC = AArch64CC::NE; 2744 AArch64CC::CondCode ExtraCC = AArch64CC::AL; 2745 2746 if (optimizeSelect(SI)) 2747 return true; 2748 2749 // Try to pickup the flags, so we don't have to emit another compare. 2750 if (foldXALUIntrinsic(CC, I, Cond)) { 2751 // Fake request the condition to force emission of the XALU intrinsic. 2752 unsigned CondReg = getRegForValue(Cond); 2753 if (!CondReg) 2754 return false; 2755 } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() && 2756 isValueAvailable(Cond)) { 2757 const auto *Cmp = cast<CmpInst>(Cond); 2758 // Try to optimize or fold the cmp. 2759 CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp); 2760 const Value *FoldSelect = nullptr; 2761 switch (Predicate) { 2762 default: 2763 break; 2764 case CmpInst::FCMP_FALSE: 2765 FoldSelect = SI->getFalseValue(); 2766 break; 2767 case CmpInst::FCMP_TRUE: 2768 FoldSelect = SI->getTrueValue(); 2769 break; 2770 } 2771 2772 if (FoldSelect) { 2773 unsigned SrcReg = getRegForValue(FoldSelect); 2774 if (!SrcReg) 2775 return false; 2776 unsigned UseReg = lookUpRegForValue(SI); 2777 if (UseReg) 2778 MRI.clearKillFlags(UseReg); 2779 2780 updateValueMap(I, SrcReg); 2781 return true; 2782 } 2783 2784 // Emit the cmp. 2785 if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned())) 2786 return false; 2787 2788 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction. 2789 CC = getCompareCC(Predicate); 2790 switch (Predicate) { 2791 default: 2792 break; 2793 case CmpInst::FCMP_UEQ: 2794 ExtraCC = AArch64CC::EQ; 2795 CC = AArch64CC::VS; 2796 break; 2797 case CmpInst::FCMP_ONE: 2798 ExtraCC = AArch64CC::MI; 2799 CC = AArch64CC::GT; 2800 break; 2801 } 2802 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2803 } else { 2804 unsigned CondReg = getRegForValue(Cond); 2805 if (!CondReg) 2806 return false; 2807 bool CondIsKill = hasTrivialKill(Cond); 2808 2809 const MCInstrDesc &II = TII.get(AArch64::ANDSWri); 2810 CondReg = constrainOperandRegClass(II, CondReg, 1); 2811 2812 // Emit a TST instruction (ANDS wzr, reg, #imm). 2813 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, 2814 AArch64::WZR) 2815 .addReg(CondReg, getKillRegState(CondIsKill)) 2816 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); 2817 } 2818 2819 unsigned Src1Reg = getRegForValue(SI->getTrueValue()); 2820 bool Src1IsKill = hasTrivialKill(SI->getTrueValue()); 2821 2822 unsigned Src2Reg = getRegForValue(SI->getFalseValue()); 2823 bool Src2IsKill = hasTrivialKill(SI->getFalseValue()); 2824 2825 if (!Src1Reg || !Src2Reg) 2826 return false; 2827 2828 if (ExtraCC != AArch64CC::AL) { 2829 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg, 2830 Src2IsKill, ExtraCC); 2831 Src2IsKill = true; 2832 } 2833 unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg, 2834 Src2IsKill, CC); 2835 updateValueMap(I, ResultReg); 2836 return true; 2837 } 2838 2839 bool AArch64FastISel::selectFPExt(const Instruction *I) { 2840 Value *V = I->getOperand(0); 2841 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy()) 2842 return false; 2843 2844 unsigned Op = getRegForValue(V); 2845 if (Op == 0) 2846 return false; 2847 2848 unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass); 2849 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr), 2850 ResultReg).addReg(Op); 2851 updateValueMap(I, ResultReg); 2852 return true; 2853 } 2854 2855 bool AArch64FastISel::selectFPTrunc(const Instruction *I) { 2856 Value *V = I->getOperand(0); 2857 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy()) 2858 return false; 2859 2860 unsigned Op = getRegForValue(V); 2861 if (Op == 0) 2862 return false; 2863 2864 unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass); 2865 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr), 2866 ResultReg).addReg(Op); 2867 updateValueMap(I, ResultReg); 2868 return true; 2869 } 2870 2871 // FPToUI and FPToSI 2872 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) { 2873 MVT DestVT; 2874 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 2875 return false; 2876 2877 unsigned SrcReg = getRegForValue(I->getOperand(0)); 2878 if (SrcReg == 0) 2879 return false; 2880 2881 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); 2882 if (SrcVT == MVT::f128 || SrcVT == MVT::f16) 2883 return false; 2884 2885 unsigned Opc; 2886 if (SrcVT == MVT::f64) { 2887 if (Signed) 2888 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr; 2889 else 2890 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr; 2891 } else { 2892 if (Signed) 2893 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr; 2894 else 2895 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr; 2896 } 2897 unsigned ResultReg = createResultReg( 2898 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass); 2899 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 2900 .addReg(SrcReg); 2901 updateValueMap(I, ResultReg); 2902 return true; 2903 } 2904 2905 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) { 2906 MVT DestVT; 2907 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 2908 return false; 2909 // Let regular ISEL handle FP16 2910 if (DestVT == MVT::f16) 2911 return false; 2912 2913 assert((DestVT == MVT::f32 || DestVT == MVT::f64) && 2914 "Unexpected value type."); 2915 2916 unsigned SrcReg = getRegForValue(I->getOperand(0)); 2917 if (!SrcReg) 2918 return false; 2919 bool SrcIsKill = hasTrivialKill(I->getOperand(0)); 2920 2921 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); 2922 2923 // Handle sign-extension. 2924 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { 2925 SrcReg = 2926 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed); 2927 if (!SrcReg) 2928 return false; 2929 SrcIsKill = true; 2930 } 2931 2932 unsigned Opc; 2933 if (SrcVT == MVT::i64) { 2934 if (Signed) 2935 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri; 2936 else 2937 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri; 2938 } else { 2939 if (Signed) 2940 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri; 2941 else 2942 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri; 2943 } 2944 2945 unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg, 2946 SrcIsKill); 2947 updateValueMap(I, ResultReg); 2948 return true; 2949 } 2950 2951 bool AArch64FastISel::fastLowerArguments() { 2952 if (!FuncInfo.CanLowerReturn) 2953 return false; 2954 2955 const Function *F = FuncInfo.Fn; 2956 if (F->isVarArg()) 2957 return false; 2958 2959 CallingConv::ID CC = F->getCallingConv(); 2960 if (CC != CallingConv::C && CC != CallingConv::Swift) 2961 return false; 2962 2963 if (Subtarget->hasCustomCallingConv()) 2964 return false; 2965 2966 // Only handle simple cases of up to 8 GPR and FPR each. 2967 unsigned GPRCnt = 0; 2968 unsigned FPRCnt = 0; 2969 for (auto const &Arg : F->args()) { 2970 if (Arg.hasAttribute(Attribute::ByVal) || 2971 Arg.hasAttribute(Attribute::InReg) || 2972 Arg.hasAttribute(Attribute::StructRet) || 2973 Arg.hasAttribute(Attribute::SwiftSelf) || 2974 Arg.hasAttribute(Attribute::SwiftError) || 2975 Arg.hasAttribute(Attribute::Nest)) 2976 return false; 2977 2978 Type *ArgTy = Arg.getType(); 2979 if (ArgTy->isStructTy() || ArgTy->isArrayTy()) 2980 return false; 2981 2982 EVT ArgVT = TLI.getValueType(DL, ArgTy); 2983 if (!ArgVT.isSimple()) 2984 return false; 2985 2986 MVT VT = ArgVT.getSimpleVT().SimpleTy; 2987 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8()) 2988 return false; 2989 2990 if (VT.isVector() && 2991 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian())) 2992 return false; 2993 2994 if (VT >= MVT::i1 && VT <= MVT::i64) 2995 ++GPRCnt; 2996 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() || 2997 VT.is128BitVector()) 2998 ++FPRCnt; 2999 else 3000 return false; 3001 3002 if (GPRCnt > 8 || FPRCnt > 8) 3003 return false; 3004 } 3005 3006 static const MCPhysReg Registers[6][8] = { 3007 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4, 3008 AArch64::W5, AArch64::W6, AArch64::W7 }, 3009 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4, 3010 AArch64::X5, AArch64::X6, AArch64::X7 }, 3011 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4, 3012 AArch64::H5, AArch64::H6, AArch64::H7 }, 3013 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4, 3014 AArch64::S5, AArch64::S6, AArch64::S7 }, 3015 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4, 3016 AArch64::D5, AArch64::D6, AArch64::D7 }, 3017 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, 3018 AArch64::Q5, AArch64::Q6, AArch64::Q7 } 3019 }; 3020 3021 unsigned GPRIdx = 0; 3022 unsigned FPRIdx = 0; 3023 for (auto const &Arg : F->args()) { 3024 MVT VT = TLI.getSimpleValueType(DL, Arg.getType()); 3025 unsigned SrcReg; 3026 const TargetRegisterClass *RC; 3027 if (VT >= MVT::i1 && VT <= MVT::i32) { 3028 SrcReg = Registers[0][GPRIdx++]; 3029 RC = &AArch64::GPR32RegClass; 3030 VT = MVT::i32; 3031 } else if (VT == MVT::i64) { 3032 SrcReg = Registers[1][GPRIdx++]; 3033 RC = &AArch64::GPR64RegClass; 3034 } else if (VT == MVT::f16) { 3035 SrcReg = Registers[2][FPRIdx++]; 3036 RC = &AArch64::FPR16RegClass; 3037 } else if (VT == MVT::f32) { 3038 SrcReg = Registers[3][FPRIdx++]; 3039 RC = &AArch64::FPR32RegClass; 3040 } else if ((VT == MVT::f64) || VT.is64BitVector()) { 3041 SrcReg = Registers[4][FPRIdx++]; 3042 RC = &AArch64::FPR64RegClass; 3043 } else if (VT.is128BitVector()) { 3044 SrcReg = Registers[5][FPRIdx++]; 3045 RC = &AArch64::FPR128RegClass; 3046 } else 3047 llvm_unreachable("Unexpected value type."); 3048 3049 unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); 3050 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. 3051 // Without this, EmitLiveInCopies may eliminate the livein if its only 3052 // use is a bitcast (which isn't turned into an instruction). 3053 unsigned ResultReg = createResultReg(RC); 3054 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3055 TII.get(TargetOpcode::COPY), ResultReg) 3056 .addReg(DstReg, getKillRegState(true)); 3057 updateValueMap(&Arg, ResultReg); 3058 } 3059 return true; 3060 } 3061 3062 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI, 3063 SmallVectorImpl<MVT> &OutVTs, 3064 unsigned &NumBytes) { 3065 CallingConv::ID CC = CLI.CallConv; 3066 SmallVector<CCValAssign, 16> ArgLocs; 3067 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context); 3068 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC)); 3069 3070 // Get a count of how many bytes are to be pushed on the stack. 3071 NumBytes = CCInfo.getNextStackOffset(); 3072 3073 // Issue CALLSEQ_START 3074 unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); 3075 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) 3076 .addImm(NumBytes).addImm(0); 3077 3078 // Process the args. 3079 for (CCValAssign &VA : ArgLocs) { 3080 const Value *ArgVal = CLI.OutVals[VA.getValNo()]; 3081 MVT ArgVT = OutVTs[VA.getValNo()]; 3082 3083 unsigned ArgReg = getRegForValue(ArgVal); 3084 if (!ArgReg) 3085 return false; 3086 3087 // Handle arg promotion: SExt, ZExt, AExt. 3088 switch (VA.getLocInfo()) { 3089 case CCValAssign::Full: 3090 break; 3091 case CCValAssign::SExt: { 3092 MVT DestVT = VA.getLocVT(); 3093 MVT SrcVT = ArgVT; 3094 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false); 3095 if (!ArgReg) 3096 return false; 3097 break; 3098 } 3099 case CCValAssign::AExt: 3100 // Intentional fall-through. 3101 case CCValAssign::ZExt: { 3102 MVT DestVT = VA.getLocVT(); 3103 MVT SrcVT = ArgVT; 3104 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true); 3105 if (!ArgReg) 3106 return false; 3107 break; 3108 } 3109 default: 3110 llvm_unreachable("Unknown arg promotion!"); 3111 } 3112 3113 // Now copy/store arg to correct locations. 3114 if (VA.isRegLoc() && !VA.needsCustom()) { 3115 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3116 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg); 3117 CLI.OutRegs.push_back(VA.getLocReg()); 3118 } else if (VA.needsCustom()) { 3119 // FIXME: Handle custom args. 3120 return false; 3121 } else { 3122 assert(VA.isMemLoc() && "Assuming store on stack."); 3123 3124 // Don't emit stores for undef values. 3125 if (isa<UndefValue>(ArgVal)) 3126 continue; 3127 3128 // Need to store on the stack. 3129 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8; 3130 3131 unsigned BEAlign = 0; 3132 if (ArgSize < 8 && !Subtarget->isLittleEndian()) 3133 BEAlign = 8 - ArgSize; 3134 3135 Address Addr; 3136 Addr.setKind(Address::RegBase); 3137 Addr.setReg(AArch64::SP); 3138 Addr.setOffset(VA.getLocMemOffset() + BEAlign); 3139 3140 unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType()); 3141 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( 3142 MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()), 3143 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); 3144 3145 if (!emitStore(ArgVT, ArgReg, Addr, MMO)) 3146 return false; 3147 } 3148 } 3149 return true; 3150 } 3151 3152 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT, 3153 unsigned NumBytes) { 3154 CallingConv::ID CC = CLI.CallConv; 3155 3156 // Issue CALLSEQ_END 3157 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); 3158 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) 3159 .addImm(NumBytes).addImm(0); 3160 3161 // Now the return value. 3162 if (RetVT != MVT::isVoid) { 3163 SmallVector<CCValAssign, 16> RVLocs; 3164 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); 3165 CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC)); 3166 3167 // Only handle a single return value. 3168 if (RVLocs.size() != 1) 3169 return false; 3170 3171 // Copy all of the result registers out of their specified physreg. 3172 MVT CopyVT = RVLocs[0].getValVT(); 3173 3174 // TODO: Handle big-endian results 3175 if (CopyVT.isVector() && !Subtarget->isLittleEndian()) 3176 return false; 3177 3178 unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT)); 3179 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3180 TII.get(TargetOpcode::COPY), ResultReg) 3181 .addReg(RVLocs[0].getLocReg()); 3182 CLI.InRegs.push_back(RVLocs[0].getLocReg()); 3183 3184 CLI.ResultReg = ResultReg; 3185 CLI.NumResultRegs = 1; 3186 } 3187 3188 return true; 3189 } 3190 3191 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { 3192 CallingConv::ID CC = CLI.CallConv; 3193 bool IsTailCall = CLI.IsTailCall; 3194 bool IsVarArg = CLI.IsVarArg; 3195 const Value *Callee = CLI.Callee; 3196 MCSymbol *Symbol = CLI.Symbol; 3197 3198 if (!Callee && !Symbol) 3199 return false; 3200 3201 // Allow SelectionDAG isel to handle tail calls. 3202 if (IsTailCall) 3203 return false; 3204 3205 // FIXME: we could and should support this, but for now correctness at -O0 is 3206 // more important. 3207 if (Subtarget->isTargetILP32()) 3208 return false; 3209 3210 CodeModel::Model CM = TM.getCodeModel(); 3211 // Only support the small-addressing and large code models. 3212 if (CM != CodeModel::Large && !Subtarget->useSmallAddressing()) 3213 return false; 3214 3215 // FIXME: Add large code model support for ELF. 3216 if (CM == CodeModel::Large && !Subtarget->isTargetMachO()) 3217 return false; 3218 3219 // Let SDISel handle vararg functions. 3220 if (IsVarArg) 3221 return false; 3222 3223 // FIXME: Only handle *simple* calls for now. 3224 MVT RetVT; 3225 if (CLI.RetTy->isVoidTy()) 3226 RetVT = MVT::isVoid; 3227 else if (!isTypeLegal(CLI.RetTy, RetVT)) 3228 return false; 3229 3230 for (auto Flag : CLI.OutFlags) 3231 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() || 3232 Flag.isSwiftSelf() || Flag.isSwiftError()) 3233 return false; 3234 3235 // Set up the argument vectors. 3236 SmallVector<MVT, 16> OutVTs; 3237 OutVTs.reserve(CLI.OutVals.size()); 3238 3239 for (auto *Val : CLI.OutVals) { 3240 MVT VT; 3241 if (!isTypeLegal(Val->getType(), VT) && 3242 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) 3243 return false; 3244 3245 // We don't handle vector parameters yet. 3246 if (VT.isVector() || VT.getSizeInBits() > 64) 3247 return false; 3248 3249 OutVTs.push_back(VT); 3250 } 3251 3252 Address Addr; 3253 if (Callee && !computeCallAddress(Callee, Addr)) 3254 return false; 3255 3256 // The weak function target may be zero; in that case we must use indirect 3257 // addressing via a stub on windows as it may be out of range for a 3258 // PC-relative jump. 3259 if (Subtarget->isTargetWindows() && Addr.getGlobalValue() && 3260 Addr.getGlobalValue()->hasExternalWeakLinkage()) 3261 return false; 3262 3263 // Handle the arguments now that we've gotten them. 3264 unsigned NumBytes; 3265 if (!processCallArgs(CLI, OutVTs, NumBytes)) 3266 return false; 3267 3268 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); 3269 if (RegInfo->isAnyArgRegReserved(*MF)) 3270 RegInfo->emitReservedArgRegCallError(*MF); 3271 3272 // Issue the call. 3273 MachineInstrBuilder MIB; 3274 if (Subtarget->useSmallAddressing()) { 3275 const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL); 3276 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II); 3277 if (Symbol) 3278 MIB.addSym(Symbol, 0); 3279 else if (Addr.getGlobalValue()) 3280 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0); 3281 else if (Addr.getReg()) { 3282 unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0); 3283 MIB.addReg(Reg); 3284 } else 3285 return false; 3286 } else { 3287 unsigned CallReg = 0; 3288 if (Symbol) { 3289 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 3290 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 3291 ADRPReg) 3292 .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE); 3293 3294 CallReg = createResultReg(&AArch64::GPR64RegClass); 3295 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3296 TII.get(AArch64::LDRXui), CallReg) 3297 .addReg(ADRPReg) 3298 .addSym(Symbol, 3299 AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 3300 } else if (Addr.getGlobalValue()) 3301 CallReg = materializeGV(Addr.getGlobalValue()); 3302 else if (Addr.getReg()) 3303 CallReg = Addr.getReg(); 3304 3305 if (!CallReg) 3306 return false; 3307 3308 const MCInstrDesc &II = TII.get(AArch64::BLR); 3309 CallReg = constrainOperandRegClass(II, CallReg, 0); 3310 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg); 3311 } 3312 3313 // Add implicit physical register uses to the call. 3314 for (auto Reg : CLI.OutRegs) 3315 MIB.addReg(Reg, RegState::Implicit); 3316 3317 // Add a register mask with the call-preserved registers. 3318 // Proper defs for return values will be added by setPhysRegsDeadExcept(). 3319 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); 3320 3321 CLI.Call = MIB; 3322 3323 // Finish off the call including any return values. 3324 return finishCall(CLI, RetVT, NumBytes); 3325 } 3326 3327 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) { 3328 if (Alignment) 3329 return Len / Alignment <= 4; 3330 else 3331 return Len < 32; 3332 } 3333 3334 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src, 3335 uint64_t Len, unsigned Alignment) { 3336 // Make sure we don't bloat code by inlining very large memcpy's. 3337 if (!isMemCpySmall(Len, Alignment)) 3338 return false; 3339 3340 int64_t UnscaledOffset = 0; 3341 Address OrigDest = Dest; 3342 Address OrigSrc = Src; 3343 3344 while (Len) { 3345 MVT VT; 3346 if (!Alignment || Alignment >= 8) { 3347 if (Len >= 8) 3348 VT = MVT::i64; 3349 else if (Len >= 4) 3350 VT = MVT::i32; 3351 else if (Len >= 2) 3352 VT = MVT::i16; 3353 else { 3354 VT = MVT::i8; 3355 } 3356 } else { 3357 // Bound based on alignment. 3358 if (Len >= 4 && Alignment == 4) 3359 VT = MVT::i32; 3360 else if (Len >= 2 && Alignment == 2) 3361 VT = MVT::i16; 3362 else { 3363 VT = MVT::i8; 3364 } 3365 } 3366 3367 unsigned ResultReg = emitLoad(VT, VT, Src); 3368 if (!ResultReg) 3369 return false; 3370 3371 if (!emitStore(VT, ResultReg, Dest)) 3372 return false; 3373 3374 int64_t Size = VT.getSizeInBits() / 8; 3375 Len -= Size; 3376 UnscaledOffset += Size; 3377 3378 // We need to recompute the unscaled offset for each iteration. 3379 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset); 3380 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset); 3381 } 3382 3383 return true; 3384 } 3385 3386 /// Check if it is possible to fold the condition from the XALU intrinsic 3387 /// into the user. The condition code will only be updated on success. 3388 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC, 3389 const Instruction *I, 3390 const Value *Cond) { 3391 if (!isa<ExtractValueInst>(Cond)) 3392 return false; 3393 3394 const auto *EV = cast<ExtractValueInst>(Cond); 3395 if (!isa<IntrinsicInst>(EV->getAggregateOperand())) 3396 return false; 3397 3398 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand()); 3399 MVT RetVT; 3400 const Function *Callee = II->getCalledFunction(); 3401 Type *RetTy = 3402 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U); 3403 if (!isTypeLegal(RetTy, RetVT)) 3404 return false; 3405 3406 if (RetVT != MVT::i32 && RetVT != MVT::i64) 3407 return false; 3408 3409 const Value *LHS = II->getArgOperand(0); 3410 const Value *RHS = II->getArgOperand(1); 3411 3412 // Canonicalize immediate to the RHS. 3413 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && 3414 isCommutativeIntrinsic(II)) 3415 std::swap(LHS, RHS); 3416 3417 // Simplify multiplies. 3418 Intrinsic::ID IID = II->getIntrinsicID(); 3419 switch (IID) { 3420 default: 3421 break; 3422 case Intrinsic::smul_with_overflow: 3423 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3424 if (C->getValue() == 2) 3425 IID = Intrinsic::sadd_with_overflow; 3426 break; 3427 case Intrinsic::umul_with_overflow: 3428 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3429 if (C->getValue() == 2) 3430 IID = Intrinsic::uadd_with_overflow; 3431 break; 3432 } 3433 3434 AArch64CC::CondCode TmpCC; 3435 switch (IID) { 3436 default: 3437 return false; 3438 case Intrinsic::sadd_with_overflow: 3439 case Intrinsic::ssub_with_overflow: 3440 TmpCC = AArch64CC::VS; 3441 break; 3442 case Intrinsic::uadd_with_overflow: 3443 TmpCC = AArch64CC::HS; 3444 break; 3445 case Intrinsic::usub_with_overflow: 3446 TmpCC = AArch64CC::LO; 3447 break; 3448 case Intrinsic::smul_with_overflow: 3449 case Intrinsic::umul_with_overflow: 3450 TmpCC = AArch64CC::NE; 3451 break; 3452 } 3453 3454 // Check if both instructions are in the same basic block. 3455 if (!isValueAvailable(II)) 3456 return false; 3457 3458 // Make sure nothing is in the way 3459 BasicBlock::const_iterator Start(I); 3460 BasicBlock::const_iterator End(II); 3461 for (auto Itr = std::prev(Start); Itr != End; --Itr) { 3462 // We only expect extractvalue instructions between the intrinsic and the 3463 // instruction to be selected. 3464 if (!isa<ExtractValueInst>(Itr)) 3465 return false; 3466 3467 // Check that the extractvalue operand comes from the intrinsic. 3468 const auto *EVI = cast<ExtractValueInst>(Itr); 3469 if (EVI->getAggregateOperand() != II) 3470 return false; 3471 } 3472 3473 CC = TmpCC; 3474 return true; 3475 } 3476 3477 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { 3478 // FIXME: Handle more intrinsics. 3479 switch (II->getIntrinsicID()) { 3480 default: return false; 3481 case Intrinsic::frameaddress: { 3482 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); 3483 MFI.setFrameAddressIsTaken(true); 3484 3485 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); 3486 Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF)); 3487 Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 3488 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3489 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr); 3490 // Recursively load frame address 3491 // ldr x0, [fp] 3492 // ldr x0, [x0] 3493 // ldr x0, [x0] 3494 // ... 3495 unsigned DestReg; 3496 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue(); 3497 while (Depth--) { 3498 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass, 3499 SrcReg, /*IsKill=*/true, 0); 3500 assert(DestReg && "Unexpected LDR instruction emission failure."); 3501 SrcReg = DestReg; 3502 } 3503 3504 updateValueMap(II, SrcReg); 3505 return true; 3506 } 3507 case Intrinsic::sponentry: { 3508 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); 3509 3510 // SP = FP + Fixed Object + 16 3511 int FI = MFI.CreateFixedObject(4, 0, false); 3512 unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass); 3513 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3514 TII.get(AArch64::ADDXri), ResultReg) 3515 .addFrameIndex(FI) 3516 .addImm(0) 3517 .addImm(0); 3518 3519 updateValueMap(II, ResultReg); 3520 return true; 3521 } 3522 case Intrinsic::memcpy: 3523 case Intrinsic::memmove: { 3524 const auto *MTI = cast<MemTransferInst>(II); 3525 // Don't handle volatile. 3526 if (MTI->isVolatile()) 3527 return false; 3528 3529 // Disable inlining for memmove before calls to ComputeAddress. Otherwise, 3530 // we would emit dead code because we don't currently handle memmoves. 3531 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy); 3532 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) { 3533 // Small memcpy's are common enough that we want to do them without a call 3534 // if possible. 3535 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue(); 3536 unsigned Alignment = MinAlign(MTI->getDestAlignment(), 3537 MTI->getSourceAlignment()); 3538 if (isMemCpySmall(Len, Alignment)) { 3539 Address Dest, Src; 3540 if (!computeAddress(MTI->getRawDest(), Dest) || 3541 !computeAddress(MTI->getRawSource(), Src)) 3542 return false; 3543 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment)) 3544 return true; 3545 } 3546 } 3547 3548 if (!MTI->getLength()->getType()->isIntegerTy(64)) 3549 return false; 3550 3551 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255) 3552 // Fast instruction selection doesn't support the special 3553 // address spaces. 3554 return false; 3555 3556 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove"; 3557 return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1); 3558 } 3559 case Intrinsic::memset: { 3560 const MemSetInst *MSI = cast<MemSetInst>(II); 3561 // Don't handle volatile. 3562 if (MSI->isVolatile()) 3563 return false; 3564 3565 if (!MSI->getLength()->getType()->isIntegerTy(64)) 3566 return false; 3567 3568 if (MSI->getDestAddressSpace() > 255) 3569 // Fast instruction selection doesn't support the special 3570 // address spaces. 3571 return false; 3572 3573 return lowerCallTo(II, "memset", II->getNumArgOperands() - 1); 3574 } 3575 case Intrinsic::sin: 3576 case Intrinsic::cos: 3577 case Intrinsic::pow: { 3578 MVT RetVT; 3579 if (!isTypeLegal(II->getType(), RetVT)) 3580 return false; 3581 3582 if (RetVT != MVT::f32 && RetVT != MVT::f64) 3583 return false; 3584 3585 static const RTLIB::Libcall LibCallTable[3][2] = { 3586 { RTLIB::SIN_F32, RTLIB::SIN_F64 }, 3587 { RTLIB::COS_F32, RTLIB::COS_F64 }, 3588 { RTLIB::POW_F32, RTLIB::POW_F64 } 3589 }; 3590 RTLIB::Libcall LC; 3591 bool Is64Bit = RetVT == MVT::f64; 3592 switch (II->getIntrinsicID()) { 3593 default: 3594 llvm_unreachable("Unexpected intrinsic."); 3595 case Intrinsic::sin: 3596 LC = LibCallTable[0][Is64Bit]; 3597 break; 3598 case Intrinsic::cos: 3599 LC = LibCallTable[1][Is64Bit]; 3600 break; 3601 case Intrinsic::pow: 3602 LC = LibCallTable[2][Is64Bit]; 3603 break; 3604 } 3605 3606 ArgListTy Args; 3607 Args.reserve(II->getNumArgOperands()); 3608 3609 // Populate the argument list. 3610 for (auto &Arg : II->arg_operands()) { 3611 ArgListEntry Entry; 3612 Entry.Val = Arg; 3613 Entry.Ty = Arg->getType(); 3614 Args.push_back(Entry); 3615 } 3616 3617 CallLoweringInfo CLI; 3618 MCContext &Ctx = MF->getContext(); 3619 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(), 3620 TLI.getLibcallName(LC), std::move(Args)); 3621 if (!lowerCallTo(CLI)) 3622 return false; 3623 updateValueMap(II, CLI.ResultReg); 3624 return true; 3625 } 3626 case Intrinsic::fabs: { 3627 MVT VT; 3628 if (!isTypeLegal(II->getType(), VT)) 3629 return false; 3630 3631 unsigned Opc; 3632 switch (VT.SimpleTy) { 3633 default: 3634 return false; 3635 case MVT::f32: 3636 Opc = AArch64::FABSSr; 3637 break; 3638 case MVT::f64: 3639 Opc = AArch64::FABSDr; 3640 break; 3641 } 3642 unsigned SrcReg = getRegForValue(II->getOperand(0)); 3643 if (!SrcReg) 3644 return false; 3645 bool SrcRegIsKill = hasTrivialKill(II->getOperand(0)); 3646 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 3647 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 3648 .addReg(SrcReg, getKillRegState(SrcRegIsKill)); 3649 updateValueMap(II, ResultReg); 3650 return true; 3651 } 3652 case Intrinsic::trap: 3653 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK)) 3654 .addImm(1); 3655 return true; 3656 case Intrinsic::debugtrap: { 3657 if (Subtarget->isTargetWindows()) { 3658 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK)) 3659 .addImm(0xF000); 3660 return true; 3661 } 3662 break; 3663 } 3664 3665 case Intrinsic::sqrt: { 3666 Type *RetTy = II->getCalledFunction()->getReturnType(); 3667 3668 MVT VT; 3669 if (!isTypeLegal(RetTy, VT)) 3670 return false; 3671 3672 unsigned Op0Reg = getRegForValue(II->getOperand(0)); 3673 if (!Op0Reg) 3674 return false; 3675 bool Op0IsKill = hasTrivialKill(II->getOperand(0)); 3676 3677 unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill); 3678 if (!ResultReg) 3679 return false; 3680 3681 updateValueMap(II, ResultReg); 3682 return true; 3683 } 3684 case Intrinsic::sadd_with_overflow: 3685 case Intrinsic::uadd_with_overflow: 3686 case Intrinsic::ssub_with_overflow: 3687 case Intrinsic::usub_with_overflow: 3688 case Intrinsic::smul_with_overflow: 3689 case Intrinsic::umul_with_overflow: { 3690 // This implements the basic lowering of the xalu with overflow intrinsics. 3691 const Function *Callee = II->getCalledFunction(); 3692 auto *Ty = cast<StructType>(Callee->getReturnType()); 3693 Type *RetTy = Ty->getTypeAtIndex(0U); 3694 3695 MVT VT; 3696 if (!isTypeLegal(RetTy, VT)) 3697 return false; 3698 3699 if (VT != MVT::i32 && VT != MVT::i64) 3700 return false; 3701 3702 const Value *LHS = II->getArgOperand(0); 3703 const Value *RHS = II->getArgOperand(1); 3704 // Canonicalize immediate to the RHS. 3705 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && 3706 isCommutativeIntrinsic(II)) 3707 std::swap(LHS, RHS); 3708 3709 // Simplify multiplies. 3710 Intrinsic::ID IID = II->getIntrinsicID(); 3711 switch (IID) { 3712 default: 3713 break; 3714 case Intrinsic::smul_with_overflow: 3715 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3716 if (C->getValue() == 2) { 3717 IID = Intrinsic::sadd_with_overflow; 3718 RHS = LHS; 3719 } 3720 break; 3721 case Intrinsic::umul_with_overflow: 3722 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3723 if (C->getValue() == 2) { 3724 IID = Intrinsic::uadd_with_overflow; 3725 RHS = LHS; 3726 } 3727 break; 3728 } 3729 3730 unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0; 3731 AArch64CC::CondCode CC = AArch64CC::Invalid; 3732 switch (IID) { 3733 default: llvm_unreachable("Unexpected intrinsic!"); 3734 case Intrinsic::sadd_with_overflow: 3735 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); 3736 CC = AArch64CC::VS; 3737 break; 3738 case Intrinsic::uadd_with_overflow: 3739 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); 3740 CC = AArch64CC::HS; 3741 break; 3742 case Intrinsic::ssub_with_overflow: 3743 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); 3744 CC = AArch64CC::VS; 3745 break; 3746 case Intrinsic::usub_with_overflow: 3747 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); 3748 CC = AArch64CC::LO; 3749 break; 3750 case Intrinsic::smul_with_overflow: { 3751 CC = AArch64CC::NE; 3752 unsigned LHSReg = getRegForValue(LHS); 3753 if (!LHSReg) 3754 return false; 3755 bool LHSIsKill = hasTrivialKill(LHS); 3756 3757 unsigned RHSReg = getRegForValue(RHS); 3758 if (!RHSReg) 3759 return false; 3760 bool RHSIsKill = hasTrivialKill(RHS); 3761 3762 if (VT == MVT::i32) { 3763 MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill); 3764 unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg, 3765 /*IsKill=*/false, 32); 3766 MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true, 3767 AArch64::sub_32); 3768 ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true, 3769 AArch64::sub_32); 3770 emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false, 3771 AArch64_AM::ASR, 31, /*WantResult=*/false); 3772 } else { 3773 assert(VT == MVT::i64 && "Unexpected value type."); 3774 // LHSReg and RHSReg cannot be killed by this Mul, since they are 3775 // reused in the next instruction. 3776 MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg, 3777 /*IsKill=*/false); 3778 unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill, 3779 RHSReg, RHSIsKill); 3780 emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false, 3781 AArch64_AM::ASR, 63, /*WantResult=*/false); 3782 } 3783 break; 3784 } 3785 case Intrinsic::umul_with_overflow: { 3786 CC = AArch64CC::NE; 3787 unsigned LHSReg = getRegForValue(LHS); 3788 if (!LHSReg) 3789 return false; 3790 bool LHSIsKill = hasTrivialKill(LHS); 3791 3792 unsigned RHSReg = getRegForValue(RHS); 3793 if (!RHSReg) 3794 return false; 3795 bool RHSIsKill = hasTrivialKill(RHS); 3796 3797 if (VT == MVT::i32) { 3798 MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill); 3799 emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg, 3800 /*IsKill=*/false, AArch64_AM::LSR, 32, 3801 /*WantResult=*/false); 3802 MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true, 3803 AArch64::sub_32); 3804 } else { 3805 assert(VT == MVT::i64 && "Unexpected value type."); 3806 // LHSReg and RHSReg cannot be killed by this Mul, since they are 3807 // reused in the next instruction. 3808 MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg, 3809 /*IsKill=*/false); 3810 unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill, 3811 RHSReg, RHSIsKill); 3812 emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg, 3813 /*IsKill=*/false, /*WantResult=*/false); 3814 } 3815 break; 3816 } 3817 } 3818 3819 if (MulReg) { 3820 ResultReg1 = createResultReg(TLI.getRegClassFor(VT)); 3821 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3822 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg); 3823 } 3824 3825 if (!ResultReg1) 3826 return false; 3827 3828 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass, 3829 AArch64::WZR, /*IsKill=*/true, AArch64::WZR, 3830 /*IsKill=*/true, getInvertedCondCode(CC)); 3831 (void)ResultReg2; 3832 assert((ResultReg1 + 1) == ResultReg2 && 3833 "Nonconsecutive result registers."); 3834 updateValueMap(II, ResultReg1, 2); 3835 return true; 3836 } 3837 } 3838 return false; 3839 } 3840 3841 bool AArch64FastISel::selectRet(const Instruction *I) { 3842 const ReturnInst *Ret = cast<ReturnInst>(I); 3843 const Function &F = *I->getParent()->getParent(); 3844 3845 if (!FuncInfo.CanLowerReturn) 3846 return false; 3847 3848 if (F.isVarArg()) 3849 return false; 3850 3851 if (TLI.supportSwiftError() && 3852 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError)) 3853 return false; 3854 3855 if (TLI.supportSplitCSR(FuncInfo.MF)) 3856 return false; 3857 3858 // Build a list of return value registers. 3859 SmallVector<unsigned, 4> RetRegs; 3860 3861 if (Ret->getNumOperands() > 0) { 3862 CallingConv::ID CC = F.getCallingConv(); 3863 SmallVector<ISD::OutputArg, 4> Outs; 3864 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL); 3865 3866 // Analyze operands of the call, assigning locations to each operand. 3867 SmallVector<CCValAssign, 16> ValLocs; 3868 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext()); 3869 CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS 3870 : RetCC_AArch64_AAPCS; 3871 CCInfo.AnalyzeReturn(Outs, RetCC); 3872 3873 // Only handle a single return value for now. 3874 if (ValLocs.size() != 1) 3875 return false; 3876 3877 CCValAssign &VA = ValLocs[0]; 3878 const Value *RV = Ret->getOperand(0); 3879 3880 // Don't bother handling odd stuff for now. 3881 if ((VA.getLocInfo() != CCValAssign::Full) && 3882 (VA.getLocInfo() != CCValAssign::BCvt)) 3883 return false; 3884 3885 // Only handle register returns for now. 3886 if (!VA.isRegLoc()) 3887 return false; 3888 3889 unsigned Reg = getRegForValue(RV); 3890 if (Reg == 0) 3891 return false; 3892 3893 unsigned SrcReg = Reg + VA.getValNo(); 3894 Register DestReg = VA.getLocReg(); 3895 // Avoid a cross-class copy. This is very unlikely. 3896 if (!MRI.getRegClass(SrcReg)->contains(DestReg)) 3897 return false; 3898 3899 EVT RVEVT = TLI.getValueType(DL, RV->getType()); 3900 if (!RVEVT.isSimple()) 3901 return false; 3902 3903 // Vectors (of > 1 lane) in big endian need tricky handling. 3904 if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 && 3905 !Subtarget->isLittleEndian()) 3906 return false; 3907 3908 MVT RVVT = RVEVT.getSimpleVT(); 3909 if (RVVT == MVT::f128) 3910 return false; 3911 3912 MVT DestVT = VA.getValVT(); 3913 // Special handling for extended integers. 3914 if (RVVT != DestVT) { 3915 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16) 3916 return false; 3917 3918 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt()) 3919 return false; 3920 3921 bool IsZExt = Outs[0].Flags.isZExt(); 3922 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt); 3923 if (SrcReg == 0) 3924 return false; 3925 } 3926 3927 // "Callee" (i.e. value producer) zero extends pointers at function 3928 // boundary. 3929 if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy()) 3930 SrcReg = emitAnd_ri(MVT::i64, SrcReg, false, 0xffffffff); 3931 3932 // Make the copy. 3933 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3934 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg); 3935 3936 // Add register to return instruction. 3937 RetRegs.push_back(VA.getLocReg()); 3938 } 3939 3940 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3941 TII.get(AArch64::RET_ReallyLR)); 3942 for (unsigned RetReg : RetRegs) 3943 MIB.addReg(RetReg, RegState::Implicit); 3944 return true; 3945 } 3946 3947 bool AArch64FastISel::selectTrunc(const Instruction *I) { 3948 Type *DestTy = I->getType(); 3949 Value *Op = I->getOperand(0); 3950 Type *SrcTy = Op->getType(); 3951 3952 EVT SrcEVT = TLI.getValueType(DL, SrcTy, true); 3953 EVT DestEVT = TLI.getValueType(DL, DestTy, true); 3954 if (!SrcEVT.isSimple()) 3955 return false; 3956 if (!DestEVT.isSimple()) 3957 return false; 3958 3959 MVT SrcVT = SrcEVT.getSimpleVT(); 3960 MVT DestVT = DestEVT.getSimpleVT(); 3961 3962 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 && 3963 SrcVT != MVT::i8) 3964 return false; 3965 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 && 3966 DestVT != MVT::i1) 3967 return false; 3968 3969 unsigned SrcReg = getRegForValue(Op); 3970 if (!SrcReg) 3971 return false; 3972 bool SrcIsKill = hasTrivialKill(Op); 3973 3974 // If we're truncating from i64 to a smaller non-legal type then generate an 3975 // AND. Otherwise, we know the high bits are undefined and a truncate only 3976 // generate a COPY. We cannot mark the source register also as result 3977 // register, because this can incorrectly transfer the kill flag onto the 3978 // source register. 3979 unsigned ResultReg; 3980 if (SrcVT == MVT::i64) { 3981 uint64_t Mask = 0; 3982 switch (DestVT.SimpleTy) { 3983 default: 3984 // Trunc i64 to i32 is handled by the target-independent fast-isel. 3985 return false; 3986 case MVT::i1: 3987 Mask = 0x1; 3988 break; 3989 case MVT::i8: 3990 Mask = 0xff; 3991 break; 3992 case MVT::i16: 3993 Mask = 0xffff; 3994 break; 3995 } 3996 // Issue an extract_subreg to get the lower 32-bits. 3997 unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill, 3998 AArch64::sub_32); 3999 // Create the AND instruction which performs the actual truncation. 4000 ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask); 4001 assert(ResultReg && "Unexpected AND instruction emission failure."); 4002 } else { 4003 ResultReg = createResultReg(&AArch64::GPR32RegClass); 4004 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4005 TII.get(TargetOpcode::COPY), ResultReg) 4006 .addReg(SrcReg, getKillRegState(SrcIsKill)); 4007 } 4008 4009 updateValueMap(I, ResultReg); 4010 return true; 4011 } 4012 4013 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) { 4014 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 || 4015 DestVT == MVT::i64) && 4016 "Unexpected value type."); 4017 // Handle i8 and i16 as i32. 4018 if (DestVT == MVT::i8 || DestVT == MVT::i16) 4019 DestVT = MVT::i32; 4020 4021 if (IsZExt) { 4022 unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1); 4023 assert(ResultReg && "Unexpected AND instruction emission failure."); 4024 if (DestVT == MVT::i64) { 4025 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the 4026 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd. 4027 Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 4028 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4029 TII.get(AArch64::SUBREG_TO_REG), Reg64) 4030 .addImm(0) 4031 .addReg(ResultReg) 4032 .addImm(AArch64::sub_32); 4033 ResultReg = Reg64; 4034 } 4035 return ResultReg; 4036 } else { 4037 if (DestVT == MVT::i64) { 4038 // FIXME: We're SExt i1 to i64. 4039 return 0; 4040 } 4041 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg, 4042 /*TODO:IsKill=*/false, 0, 0); 4043 } 4044 } 4045 4046 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 4047 unsigned Op1, bool Op1IsKill) { 4048 unsigned Opc, ZReg; 4049 switch (RetVT.SimpleTy) { 4050 default: return 0; 4051 case MVT::i8: 4052 case MVT::i16: 4053 case MVT::i32: 4054 RetVT = MVT::i32; 4055 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break; 4056 case MVT::i64: 4057 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break; 4058 } 4059 4060 const TargetRegisterClass *RC = 4061 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4062 return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill, 4063 /*IsKill=*/ZReg, true); 4064 } 4065 4066 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 4067 unsigned Op1, bool Op1IsKill) { 4068 if (RetVT != MVT::i64) 4069 return 0; 4070 4071 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass, 4072 Op0, Op0IsKill, Op1, Op1IsKill, 4073 AArch64::XZR, /*IsKill=*/true); 4074 } 4075 4076 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 4077 unsigned Op1, bool Op1IsKill) { 4078 if (RetVT != MVT::i64) 4079 return 0; 4080 4081 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass, 4082 Op0, Op0IsKill, Op1, Op1IsKill, 4083 AArch64::XZR, /*IsKill=*/true); 4084 } 4085 4086 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 4087 unsigned Op1Reg, bool Op1IsKill) { 4088 unsigned Opc = 0; 4089 bool NeedTrunc = false; 4090 uint64_t Mask = 0; 4091 switch (RetVT.SimpleTy) { 4092 default: return 0; 4093 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break; 4094 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break; 4095 case MVT::i32: Opc = AArch64::LSLVWr; break; 4096 case MVT::i64: Opc = AArch64::LSLVXr; break; 4097 } 4098 4099 const TargetRegisterClass *RC = 4100 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4101 if (NeedTrunc) { 4102 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); 4103 Op1IsKill = true; 4104 } 4105 unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, 4106 Op1IsKill); 4107 if (NeedTrunc) 4108 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 4109 return ResultReg; 4110 } 4111 4112 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4113 bool Op0IsKill, uint64_t Shift, 4114 bool IsZExt) { 4115 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4116 "Unexpected source/return type pair."); 4117 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4118 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4119 "Unexpected source value type."); 4120 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4121 RetVT == MVT::i64) && "Unexpected return value type."); 4122 4123 bool Is64Bit = (RetVT == MVT::i64); 4124 unsigned RegSize = Is64Bit ? 64 : 32; 4125 unsigned DstBits = RetVT.getSizeInBits(); 4126 unsigned SrcBits = SrcVT.getSizeInBits(); 4127 const TargetRegisterClass *RC = 4128 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4129 4130 // Just emit a copy for "zero" shifts. 4131 if (Shift == 0) { 4132 if (RetVT == SrcVT) { 4133 unsigned ResultReg = createResultReg(RC); 4134 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4135 TII.get(TargetOpcode::COPY), ResultReg) 4136 .addReg(Op0, getKillRegState(Op0IsKill)); 4137 return ResultReg; 4138 } else 4139 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4140 } 4141 4142 // Don't deal with undefined shifts. 4143 if (Shift >= DstBits) 4144 return 0; 4145 4146 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4147 // {S|U}BFM Wd, Wn, #r, #s 4148 // Wd<32+s-r,32-r> = Wn<s:0> when r > s 4149 4150 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4151 // %2 = shl i16 %1, 4 4152 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7 4153 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext 4154 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext 4155 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext 4156 4157 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4158 // %2 = shl i16 %1, 8 4159 // Wd<32+7-24,32-24> = Wn<7:0> 4160 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext 4161 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext 4162 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext 4163 4164 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4165 // %2 = shl i16 %1, 12 4166 // Wd<32+3-20,32-20> = Wn<3:0> 4167 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext 4168 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext 4169 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext 4170 4171 unsigned ImmR = RegSize - Shift; 4172 // Limit the width to the length of the source type. 4173 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift); 4174 static const unsigned OpcTable[2][2] = { 4175 {AArch64::SBFMWri, AArch64::SBFMXri}, 4176 {AArch64::UBFMWri, AArch64::UBFMXri} 4177 }; 4178 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4179 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4180 Register TmpReg = MRI.createVirtualRegister(RC); 4181 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4182 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4183 .addImm(0) 4184 .addReg(Op0, getKillRegState(Op0IsKill)) 4185 .addImm(AArch64::sub_32); 4186 Op0 = TmpReg; 4187 Op0IsKill = true; 4188 } 4189 return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS); 4190 } 4191 4192 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 4193 unsigned Op1Reg, bool Op1IsKill) { 4194 unsigned Opc = 0; 4195 bool NeedTrunc = false; 4196 uint64_t Mask = 0; 4197 switch (RetVT.SimpleTy) { 4198 default: return 0; 4199 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break; 4200 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break; 4201 case MVT::i32: Opc = AArch64::LSRVWr; break; 4202 case MVT::i64: Opc = AArch64::LSRVXr; break; 4203 } 4204 4205 const TargetRegisterClass *RC = 4206 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4207 if (NeedTrunc) { 4208 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask); 4209 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); 4210 Op0IsKill = Op1IsKill = true; 4211 } 4212 unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, 4213 Op1IsKill); 4214 if (NeedTrunc) 4215 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 4216 return ResultReg; 4217 } 4218 4219 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4220 bool Op0IsKill, uint64_t Shift, 4221 bool IsZExt) { 4222 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4223 "Unexpected source/return type pair."); 4224 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4225 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4226 "Unexpected source value type."); 4227 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4228 RetVT == MVT::i64) && "Unexpected return value type."); 4229 4230 bool Is64Bit = (RetVT == MVT::i64); 4231 unsigned RegSize = Is64Bit ? 64 : 32; 4232 unsigned DstBits = RetVT.getSizeInBits(); 4233 unsigned SrcBits = SrcVT.getSizeInBits(); 4234 const TargetRegisterClass *RC = 4235 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4236 4237 // Just emit a copy for "zero" shifts. 4238 if (Shift == 0) { 4239 if (RetVT == SrcVT) { 4240 unsigned ResultReg = createResultReg(RC); 4241 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4242 TII.get(TargetOpcode::COPY), ResultReg) 4243 .addReg(Op0, getKillRegState(Op0IsKill)); 4244 return ResultReg; 4245 } else 4246 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4247 } 4248 4249 // Don't deal with undefined shifts. 4250 if (Shift >= DstBits) 4251 return 0; 4252 4253 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4254 // {S|U}BFM Wd, Wn, #r, #s 4255 // Wd<s-r:0> = Wn<s:r> when r <= s 4256 4257 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4258 // %2 = lshr i16 %1, 4 4259 // Wd<7-4:0> = Wn<7:4> 4260 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext 4261 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext 4262 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext 4263 4264 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4265 // %2 = lshr i16 %1, 8 4266 // Wd<7-7,0> = Wn<7:7> 4267 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext 4268 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4269 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4270 4271 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4272 // %2 = lshr i16 %1, 12 4273 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 4274 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext 4275 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4276 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4277 4278 if (Shift >= SrcBits && IsZExt) 4279 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); 4280 4281 // It is not possible to fold a sign-extend into the LShr instruction. In this 4282 // case emit a sign-extend. 4283 if (!IsZExt) { 4284 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4285 if (!Op0) 4286 return 0; 4287 Op0IsKill = true; 4288 SrcVT = RetVT; 4289 SrcBits = SrcVT.getSizeInBits(); 4290 IsZExt = true; 4291 } 4292 4293 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); 4294 unsigned ImmS = SrcBits - 1; 4295 static const unsigned OpcTable[2][2] = { 4296 {AArch64::SBFMWri, AArch64::SBFMXri}, 4297 {AArch64::UBFMWri, AArch64::UBFMXri} 4298 }; 4299 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4300 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4301 Register TmpReg = MRI.createVirtualRegister(RC); 4302 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4303 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4304 .addImm(0) 4305 .addReg(Op0, getKillRegState(Op0IsKill)) 4306 .addImm(AArch64::sub_32); 4307 Op0 = TmpReg; 4308 Op0IsKill = true; 4309 } 4310 return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS); 4311 } 4312 4313 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 4314 unsigned Op1Reg, bool Op1IsKill) { 4315 unsigned Opc = 0; 4316 bool NeedTrunc = false; 4317 uint64_t Mask = 0; 4318 switch (RetVT.SimpleTy) { 4319 default: return 0; 4320 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break; 4321 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break; 4322 case MVT::i32: Opc = AArch64::ASRVWr; break; 4323 case MVT::i64: Opc = AArch64::ASRVXr; break; 4324 } 4325 4326 const TargetRegisterClass *RC = 4327 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4328 if (NeedTrunc) { 4329 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false); 4330 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); 4331 Op0IsKill = Op1IsKill = true; 4332 } 4333 unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, 4334 Op1IsKill); 4335 if (NeedTrunc) 4336 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 4337 return ResultReg; 4338 } 4339 4340 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4341 bool Op0IsKill, uint64_t Shift, 4342 bool IsZExt) { 4343 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4344 "Unexpected source/return type pair."); 4345 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4346 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4347 "Unexpected source value type."); 4348 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4349 RetVT == MVT::i64) && "Unexpected return value type."); 4350 4351 bool Is64Bit = (RetVT == MVT::i64); 4352 unsigned RegSize = Is64Bit ? 64 : 32; 4353 unsigned DstBits = RetVT.getSizeInBits(); 4354 unsigned SrcBits = SrcVT.getSizeInBits(); 4355 const TargetRegisterClass *RC = 4356 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4357 4358 // Just emit a copy for "zero" shifts. 4359 if (Shift == 0) { 4360 if (RetVT == SrcVT) { 4361 unsigned ResultReg = createResultReg(RC); 4362 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4363 TII.get(TargetOpcode::COPY), ResultReg) 4364 .addReg(Op0, getKillRegState(Op0IsKill)); 4365 return ResultReg; 4366 } else 4367 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4368 } 4369 4370 // Don't deal with undefined shifts. 4371 if (Shift >= DstBits) 4372 return 0; 4373 4374 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4375 // {S|U}BFM Wd, Wn, #r, #s 4376 // Wd<s-r:0> = Wn<s:r> when r <= s 4377 4378 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4379 // %2 = ashr i16 %1, 4 4380 // Wd<7-4:0> = Wn<7:4> 4381 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext 4382 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext 4383 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext 4384 4385 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4386 // %2 = ashr i16 %1, 8 4387 // Wd<7-7,0> = Wn<7:7> 4388 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext 4389 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4390 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4391 4392 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4393 // %2 = ashr i16 %1, 12 4394 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 4395 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext 4396 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4397 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4398 4399 if (Shift >= SrcBits && IsZExt) 4400 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); 4401 4402 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); 4403 unsigned ImmS = SrcBits - 1; 4404 static const unsigned OpcTable[2][2] = { 4405 {AArch64::SBFMWri, AArch64::SBFMXri}, 4406 {AArch64::UBFMWri, AArch64::UBFMXri} 4407 }; 4408 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4409 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4410 Register TmpReg = MRI.createVirtualRegister(RC); 4411 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4412 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4413 .addImm(0) 4414 .addReg(Op0, getKillRegState(Op0IsKill)) 4415 .addImm(AArch64::sub_32); 4416 Op0 = TmpReg; 4417 Op0IsKill = true; 4418 } 4419 return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS); 4420 } 4421 4422 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, 4423 bool IsZExt) { 4424 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?"); 4425 4426 // FastISel does not have plumbing to deal with extensions where the SrcVT or 4427 // DestVT are odd things, so test to make sure that they are both types we can 4428 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise 4429 // bail out to SelectionDAG. 4430 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) && 4431 (DestVT != MVT::i32) && (DestVT != MVT::i64)) || 4432 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) && 4433 (SrcVT != MVT::i16) && (SrcVT != MVT::i32))) 4434 return 0; 4435 4436 unsigned Opc; 4437 unsigned Imm = 0; 4438 4439 switch (SrcVT.SimpleTy) { 4440 default: 4441 return 0; 4442 case MVT::i1: 4443 return emiti1Ext(SrcReg, DestVT, IsZExt); 4444 case MVT::i8: 4445 if (DestVT == MVT::i64) 4446 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4447 else 4448 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 4449 Imm = 7; 4450 break; 4451 case MVT::i16: 4452 if (DestVT == MVT::i64) 4453 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4454 else 4455 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 4456 Imm = 15; 4457 break; 4458 case MVT::i32: 4459 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?"); 4460 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4461 Imm = 31; 4462 break; 4463 } 4464 4465 // Handle i8 and i16 as i32. 4466 if (DestVT == MVT::i8 || DestVT == MVT::i16) 4467 DestVT = MVT::i32; 4468 else if (DestVT == MVT::i64) { 4469 Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 4470 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4471 TII.get(AArch64::SUBREG_TO_REG), Src64) 4472 .addImm(0) 4473 .addReg(SrcReg) 4474 .addImm(AArch64::sub_32); 4475 SrcReg = Src64; 4476 } 4477 4478 const TargetRegisterClass *RC = 4479 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4480 return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm); 4481 } 4482 4483 static bool isZExtLoad(const MachineInstr *LI) { 4484 switch (LI->getOpcode()) { 4485 default: 4486 return false; 4487 case AArch64::LDURBBi: 4488 case AArch64::LDURHHi: 4489 case AArch64::LDURWi: 4490 case AArch64::LDRBBui: 4491 case AArch64::LDRHHui: 4492 case AArch64::LDRWui: 4493 case AArch64::LDRBBroX: 4494 case AArch64::LDRHHroX: 4495 case AArch64::LDRWroX: 4496 case AArch64::LDRBBroW: 4497 case AArch64::LDRHHroW: 4498 case AArch64::LDRWroW: 4499 return true; 4500 } 4501 } 4502 4503 static bool isSExtLoad(const MachineInstr *LI) { 4504 switch (LI->getOpcode()) { 4505 default: 4506 return false; 4507 case AArch64::LDURSBWi: 4508 case AArch64::LDURSHWi: 4509 case AArch64::LDURSBXi: 4510 case AArch64::LDURSHXi: 4511 case AArch64::LDURSWi: 4512 case AArch64::LDRSBWui: 4513 case AArch64::LDRSHWui: 4514 case AArch64::LDRSBXui: 4515 case AArch64::LDRSHXui: 4516 case AArch64::LDRSWui: 4517 case AArch64::LDRSBWroX: 4518 case AArch64::LDRSHWroX: 4519 case AArch64::LDRSBXroX: 4520 case AArch64::LDRSHXroX: 4521 case AArch64::LDRSWroX: 4522 case AArch64::LDRSBWroW: 4523 case AArch64::LDRSHWroW: 4524 case AArch64::LDRSBXroW: 4525 case AArch64::LDRSHXroW: 4526 case AArch64::LDRSWroW: 4527 return true; 4528 } 4529 } 4530 4531 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT, 4532 MVT SrcVT) { 4533 const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)); 4534 if (!LI || !LI->hasOneUse()) 4535 return false; 4536 4537 // Check if the load instruction has already been selected. 4538 unsigned Reg = lookUpRegForValue(LI); 4539 if (!Reg) 4540 return false; 4541 4542 MachineInstr *MI = MRI.getUniqueVRegDef(Reg); 4543 if (!MI) 4544 return false; 4545 4546 // Check if the correct load instruction has been emitted - SelectionDAG might 4547 // have emitted a zero-extending load, but we need a sign-extending load. 4548 bool IsZExt = isa<ZExtInst>(I); 4549 const auto *LoadMI = MI; 4550 if (LoadMI->getOpcode() == TargetOpcode::COPY && 4551 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) { 4552 Register LoadReg = MI->getOperand(1).getReg(); 4553 LoadMI = MRI.getUniqueVRegDef(LoadReg); 4554 assert(LoadMI && "Expected valid instruction"); 4555 } 4556 if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI))) 4557 return false; 4558 4559 // Nothing to be done. 4560 if (RetVT != MVT::i64 || SrcVT > MVT::i32) { 4561 updateValueMap(I, Reg); 4562 return true; 4563 } 4564 4565 if (IsZExt) { 4566 unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass); 4567 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4568 TII.get(AArch64::SUBREG_TO_REG), Reg64) 4569 .addImm(0) 4570 .addReg(Reg, getKillRegState(true)) 4571 .addImm(AArch64::sub_32); 4572 Reg = Reg64; 4573 } else { 4574 assert((MI->getOpcode() == TargetOpcode::COPY && 4575 MI->getOperand(1).getSubReg() == AArch64::sub_32) && 4576 "Expected copy instruction"); 4577 Reg = MI->getOperand(1).getReg(); 4578 MachineBasicBlock::iterator I(MI); 4579 removeDeadCode(I, std::next(I)); 4580 } 4581 updateValueMap(I, Reg); 4582 return true; 4583 } 4584 4585 bool AArch64FastISel::selectIntExt(const Instruction *I) { 4586 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 4587 "Unexpected integer extend instruction."); 4588 MVT RetVT; 4589 MVT SrcVT; 4590 if (!isTypeSupported(I->getType(), RetVT)) 4591 return false; 4592 4593 if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT)) 4594 return false; 4595 4596 // Try to optimize already sign-/zero-extended values from load instructions. 4597 if (optimizeIntExtLoad(I, RetVT, SrcVT)) 4598 return true; 4599 4600 unsigned SrcReg = getRegForValue(I->getOperand(0)); 4601 if (!SrcReg) 4602 return false; 4603 bool SrcIsKill = hasTrivialKill(I->getOperand(0)); 4604 4605 // Try to optimize already sign-/zero-extended values from function arguments. 4606 bool IsZExt = isa<ZExtInst>(I); 4607 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) { 4608 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) { 4609 if (RetVT == MVT::i64 && SrcVT != MVT::i64) { 4610 unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass); 4611 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4612 TII.get(AArch64::SUBREG_TO_REG), ResultReg) 4613 .addImm(0) 4614 .addReg(SrcReg, getKillRegState(SrcIsKill)) 4615 .addImm(AArch64::sub_32); 4616 SrcReg = ResultReg; 4617 } 4618 // Conservatively clear all kill flags from all uses, because we are 4619 // replacing a sign-/zero-extend instruction at IR level with a nop at MI 4620 // level. The result of the instruction at IR level might have been 4621 // trivially dead, which is now not longer true. 4622 unsigned UseReg = lookUpRegForValue(I); 4623 if (UseReg) 4624 MRI.clearKillFlags(UseReg); 4625 4626 updateValueMap(I, SrcReg); 4627 return true; 4628 } 4629 } 4630 4631 unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt); 4632 if (!ResultReg) 4633 return false; 4634 4635 updateValueMap(I, ResultReg); 4636 return true; 4637 } 4638 4639 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) { 4640 EVT DestEVT = TLI.getValueType(DL, I->getType(), true); 4641 if (!DestEVT.isSimple()) 4642 return false; 4643 4644 MVT DestVT = DestEVT.getSimpleVT(); 4645 if (DestVT != MVT::i64 && DestVT != MVT::i32) 4646 return false; 4647 4648 unsigned DivOpc; 4649 bool Is64bit = (DestVT == MVT::i64); 4650 switch (ISDOpcode) { 4651 default: 4652 return false; 4653 case ISD::SREM: 4654 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr; 4655 break; 4656 case ISD::UREM: 4657 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr; 4658 break; 4659 } 4660 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr; 4661 unsigned Src0Reg = getRegForValue(I->getOperand(0)); 4662 if (!Src0Reg) 4663 return false; 4664 bool Src0IsKill = hasTrivialKill(I->getOperand(0)); 4665 4666 unsigned Src1Reg = getRegForValue(I->getOperand(1)); 4667 if (!Src1Reg) 4668 return false; 4669 bool Src1IsKill = hasTrivialKill(I->getOperand(1)); 4670 4671 const TargetRegisterClass *RC = 4672 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4673 unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false, 4674 Src1Reg, /*IsKill=*/false); 4675 assert(QuotReg && "Unexpected DIV instruction emission failure."); 4676 // The remainder is computed as numerator - (quotient * denominator) using the 4677 // MSUB instruction. 4678 unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true, 4679 Src1Reg, Src1IsKill, Src0Reg, 4680 Src0IsKill); 4681 updateValueMap(I, ResultReg); 4682 return true; 4683 } 4684 4685 bool AArch64FastISel::selectMul(const Instruction *I) { 4686 MVT VT; 4687 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 4688 return false; 4689 4690 if (VT.isVector()) 4691 return selectBinaryOp(I, ISD::MUL); 4692 4693 const Value *Src0 = I->getOperand(0); 4694 const Value *Src1 = I->getOperand(1); 4695 if (const auto *C = dyn_cast<ConstantInt>(Src0)) 4696 if (C->getValue().isPowerOf2()) 4697 std::swap(Src0, Src1); 4698 4699 // Try to simplify to a shift instruction. 4700 if (const auto *C = dyn_cast<ConstantInt>(Src1)) 4701 if (C->getValue().isPowerOf2()) { 4702 uint64_t ShiftVal = C->getValue().logBase2(); 4703 MVT SrcVT = VT; 4704 bool IsZExt = true; 4705 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) { 4706 if (!isIntExtFree(ZExt)) { 4707 MVT VT; 4708 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) { 4709 SrcVT = VT; 4710 IsZExt = true; 4711 Src0 = ZExt->getOperand(0); 4712 } 4713 } 4714 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) { 4715 if (!isIntExtFree(SExt)) { 4716 MVT VT; 4717 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) { 4718 SrcVT = VT; 4719 IsZExt = false; 4720 Src0 = SExt->getOperand(0); 4721 } 4722 } 4723 } 4724 4725 unsigned Src0Reg = getRegForValue(Src0); 4726 if (!Src0Reg) 4727 return false; 4728 bool Src0IsKill = hasTrivialKill(Src0); 4729 4730 unsigned ResultReg = 4731 emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt); 4732 4733 if (ResultReg) { 4734 updateValueMap(I, ResultReg); 4735 return true; 4736 } 4737 } 4738 4739 unsigned Src0Reg = getRegForValue(I->getOperand(0)); 4740 if (!Src0Reg) 4741 return false; 4742 bool Src0IsKill = hasTrivialKill(I->getOperand(0)); 4743 4744 unsigned Src1Reg = getRegForValue(I->getOperand(1)); 4745 if (!Src1Reg) 4746 return false; 4747 bool Src1IsKill = hasTrivialKill(I->getOperand(1)); 4748 4749 unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill); 4750 4751 if (!ResultReg) 4752 return false; 4753 4754 updateValueMap(I, ResultReg); 4755 return true; 4756 } 4757 4758 bool AArch64FastISel::selectShift(const Instruction *I) { 4759 MVT RetVT; 4760 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true)) 4761 return false; 4762 4763 if (RetVT.isVector()) 4764 return selectOperator(I, I->getOpcode()); 4765 4766 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) { 4767 unsigned ResultReg = 0; 4768 uint64_t ShiftVal = C->getZExtValue(); 4769 MVT SrcVT = RetVT; 4770 bool IsZExt = I->getOpcode() != Instruction::AShr; 4771 const Value *Op0 = I->getOperand(0); 4772 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) { 4773 if (!isIntExtFree(ZExt)) { 4774 MVT TmpVT; 4775 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) { 4776 SrcVT = TmpVT; 4777 IsZExt = true; 4778 Op0 = ZExt->getOperand(0); 4779 } 4780 } 4781 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) { 4782 if (!isIntExtFree(SExt)) { 4783 MVT TmpVT; 4784 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) { 4785 SrcVT = TmpVT; 4786 IsZExt = false; 4787 Op0 = SExt->getOperand(0); 4788 } 4789 } 4790 } 4791 4792 unsigned Op0Reg = getRegForValue(Op0); 4793 if (!Op0Reg) 4794 return false; 4795 bool Op0IsKill = hasTrivialKill(Op0); 4796 4797 switch (I->getOpcode()) { 4798 default: llvm_unreachable("Unexpected instruction."); 4799 case Instruction::Shl: 4800 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt); 4801 break; 4802 case Instruction::AShr: 4803 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt); 4804 break; 4805 case Instruction::LShr: 4806 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt); 4807 break; 4808 } 4809 if (!ResultReg) 4810 return false; 4811 4812 updateValueMap(I, ResultReg); 4813 return true; 4814 } 4815 4816 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 4817 if (!Op0Reg) 4818 return false; 4819 bool Op0IsKill = hasTrivialKill(I->getOperand(0)); 4820 4821 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 4822 if (!Op1Reg) 4823 return false; 4824 bool Op1IsKill = hasTrivialKill(I->getOperand(1)); 4825 4826 unsigned ResultReg = 0; 4827 switch (I->getOpcode()) { 4828 default: llvm_unreachable("Unexpected instruction."); 4829 case Instruction::Shl: 4830 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); 4831 break; 4832 case Instruction::AShr: 4833 ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); 4834 break; 4835 case Instruction::LShr: 4836 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); 4837 break; 4838 } 4839 4840 if (!ResultReg) 4841 return false; 4842 4843 updateValueMap(I, ResultReg); 4844 return true; 4845 } 4846 4847 bool AArch64FastISel::selectBitCast(const Instruction *I) { 4848 MVT RetVT, SrcVT; 4849 4850 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT)) 4851 return false; 4852 if (!isTypeLegal(I->getType(), RetVT)) 4853 return false; 4854 4855 unsigned Opc; 4856 if (RetVT == MVT::f32 && SrcVT == MVT::i32) 4857 Opc = AArch64::FMOVWSr; 4858 else if (RetVT == MVT::f64 && SrcVT == MVT::i64) 4859 Opc = AArch64::FMOVXDr; 4860 else if (RetVT == MVT::i32 && SrcVT == MVT::f32) 4861 Opc = AArch64::FMOVSWr; 4862 else if (RetVT == MVT::i64 && SrcVT == MVT::f64) 4863 Opc = AArch64::FMOVDXr; 4864 else 4865 return false; 4866 4867 const TargetRegisterClass *RC = nullptr; 4868 switch (RetVT.SimpleTy) { 4869 default: llvm_unreachable("Unexpected value type."); 4870 case MVT::i32: RC = &AArch64::GPR32RegClass; break; 4871 case MVT::i64: RC = &AArch64::GPR64RegClass; break; 4872 case MVT::f32: RC = &AArch64::FPR32RegClass; break; 4873 case MVT::f64: RC = &AArch64::FPR64RegClass; break; 4874 } 4875 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 4876 if (!Op0Reg) 4877 return false; 4878 bool Op0IsKill = hasTrivialKill(I->getOperand(0)); 4879 unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill); 4880 4881 if (!ResultReg) 4882 return false; 4883 4884 updateValueMap(I, ResultReg); 4885 return true; 4886 } 4887 4888 bool AArch64FastISel::selectFRem(const Instruction *I) { 4889 MVT RetVT; 4890 if (!isTypeLegal(I->getType(), RetVT)) 4891 return false; 4892 4893 RTLIB::Libcall LC; 4894 switch (RetVT.SimpleTy) { 4895 default: 4896 return false; 4897 case MVT::f32: 4898 LC = RTLIB::REM_F32; 4899 break; 4900 case MVT::f64: 4901 LC = RTLIB::REM_F64; 4902 break; 4903 } 4904 4905 ArgListTy Args; 4906 Args.reserve(I->getNumOperands()); 4907 4908 // Populate the argument list. 4909 for (auto &Arg : I->operands()) { 4910 ArgListEntry Entry; 4911 Entry.Val = Arg; 4912 Entry.Ty = Arg->getType(); 4913 Args.push_back(Entry); 4914 } 4915 4916 CallLoweringInfo CLI; 4917 MCContext &Ctx = MF->getContext(); 4918 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(), 4919 TLI.getLibcallName(LC), std::move(Args)); 4920 if (!lowerCallTo(CLI)) 4921 return false; 4922 updateValueMap(I, CLI.ResultReg); 4923 return true; 4924 } 4925 4926 bool AArch64FastISel::selectSDiv(const Instruction *I) { 4927 MVT VT; 4928 if (!isTypeLegal(I->getType(), VT)) 4929 return false; 4930 4931 if (!isa<ConstantInt>(I->getOperand(1))) 4932 return selectBinaryOp(I, ISD::SDIV); 4933 4934 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue(); 4935 if ((VT != MVT::i32 && VT != MVT::i64) || !C || 4936 !(C.isPowerOf2() || (-C).isPowerOf2())) 4937 return selectBinaryOp(I, ISD::SDIV); 4938 4939 unsigned Lg2 = C.countTrailingZeros(); 4940 unsigned Src0Reg = getRegForValue(I->getOperand(0)); 4941 if (!Src0Reg) 4942 return false; 4943 bool Src0IsKill = hasTrivialKill(I->getOperand(0)); 4944 4945 if (cast<BinaryOperator>(I)->isExact()) { 4946 unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2); 4947 if (!ResultReg) 4948 return false; 4949 updateValueMap(I, ResultReg); 4950 return true; 4951 } 4952 4953 int64_t Pow2MinusOne = (1ULL << Lg2) - 1; 4954 unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne); 4955 if (!AddReg) 4956 return false; 4957 4958 // (Src0 < 0) ? Pow2 - 1 : 0; 4959 if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0)) 4960 return false; 4961 4962 unsigned SelectOpc; 4963 const TargetRegisterClass *RC; 4964 if (VT == MVT::i64) { 4965 SelectOpc = AArch64::CSELXr; 4966 RC = &AArch64::GPR64RegClass; 4967 } else { 4968 SelectOpc = AArch64::CSELWr; 4969 RC = &AArch64::GPR32RegClass; 4970 } 4971 unsigned SelectReg = 4972 fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg, 4973 Src0IsKill, AArch64CC::LT); 4974 if (!SelectReg) 4975 return false; 4976 4977 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also 4978 // negate the result. 4979 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 4980 unsigned ResultReg; 4981 if (C.isNegative()) 4982 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true, 4983 SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2); 4984 else 4985 ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2); 4986 4987 if (!ResultReg) 4988 return false; 4989 4990 updateValueMap(I, ResultReg); 4991 return true; 4992 } 4993 4994 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We 4995 /// have to duplicate it for AArch64, because otherwise we would fail during the 4996 /// sign-extend emission. 4997 std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) { 4998 unsigned IdxN = getRegForValue(Idx); 4999 if (IdxN == 0) 5000 // Unhandled operand. Halt "fast" selection and bail. 5001 return std::pair<unsigned, bool>(0, false); 5002 5003 bool IdxNIsKill = hasTrivialKill(Idx); 5004 5005 // If the index is smaller or larger than intptr_t, truncate or extend it. 5006 MVT PtrVT = TLI.getPointerTy(DL); 5007 EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); 5008 if (IdxVT.bitsLT(PtrVT)) { 5009 IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false); 5010 IdxNIsKill = true; 5011 } else if (IdxVT.bitsGT(PtrVT)) 5012 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64"); 5013 return std::pair<unsigned, bool>(IdxN, IdxNIsKill); 5014 } 5015 5016 /// This is mostly a copy of the existing FastISel GEP code, but we have to 5017 /// duplicate it for AArch64, because otherwise we would bail out even for 5018 /// simple cases. This is because the standard fastEmit functions don't cover 5019 /// MUL at all and ADD is lowered very inefficientily. 5020 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) { 5021 if (Subtarget->isTargetILP32()) 5022 return false; 5023 5024 unsigned N = getRegForValue(I->getOperand(0)); 5025 if (!N) 5026 return false; 5027 bool NIsKill = hasTrivialKill(I->getOperand(0)); 5028 5029 // Keep a running tab of the total offset to coalesce multiple N = N + Offset 5030 // into a single N = N + TotalOffset. 5031 uint64_t TotalOffs = 0; 5032 MVT VT = TLI.getPointerTy(DL); 5033 for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I); 5034 GTI != E; ++GTI) { 5035 const Value *Idx = GTI.getOperand(); 5036 if (auto *StTy = GTI.getStructTypeOrNull()) { 5037 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); 5038 // N = N + Offset 5039 if (Field) 5040 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); 5041 } else { 5042 Type *Ty = GTI.getIndexedType(); 5043 5044 // If this is a constant subscript, handle it quickly. 5045 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) { 5046 if (CI->isZero()) 5047 continue; 5048 // N = N + Offset 5049 TotalOffs += 5050 DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue(); 5051 continue; 5052 } 5053 if (TotalOffs) { 5054 N = emitAdd_ri_(VT, N, NIsKill, TotalOffs); 5055 if (!N) 5056 return false; 5057 NIsKill = true; 5058 TotalOffs = 0; 5059 } 5060 5061 // N = N + Idx * ElementSize; 5062 uint64_t ElementSize = DL.getTypeAllocSize(Ty); 5063 std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx); 5064 unsigned IdxN = Pair.first; 5065 bool IdxNIsKill = Pair.second; 5066 if (!IdxN) 5067 return false; 5068 5069 if (ElementSize != 1) { 5070 unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize); 5071 if (!C) 5072 return false; 5073 IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true); 5074 if (!IdxN) 5075 return false; 5076 IdxNIsKill = true; 5077 } 5078 N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill); 5079 if (!N) 5080 return false; 5081 } 5082 } 5083 if (TotalOffs) { 5084 N = emitAdd_ri_(VT, N, NIsKill, TotalOffs); 5085 if (!N) 5086 return false; 5087 } 5088 updateValueMap(I, N); 5089 return true; 5090 } 5091 5092 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) { 5093 assert(TM.getOptLevel() == CodeGenOpt::None && 5094 "cmpxchg survived AtomicExpand at optlevel > -O0"); 5095 5096 auto *RetPairTy = cast<StructType>(I->getType()); 5097 Type *RetTy = RetPairTy->getTypeAtIndex(0U); 5098 assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) && 5099 "cmpxchg has a non-i1 status result"); 5100 5101 MVT VT; 5102 if (!isTypeLegal(RetTy, VT)) 5103 return false; 5104 5105 const TargetRegisterClass *ResRC; 5106 unsigned Opc, CmpOpc; 5107 // This only supports i32/i64, because i8/i16 aren't legal, and the generic 5108 // extractvalue selection doesn't support that. 5109 if (VT == MVT::i32) { 5110 Opc = AArch64::CMP_SWAP_32; 5111 CmpOpc = AArch64::SUBSWrs; 5112 ResRC = &AArch64::GPR32RegClass; 5113 } else if (VT == MVT::i64) { 5114 Opc = AArch64::CMP_SWAP_64; 5115 CmpOpc = AArch64::SUBSXrs; 5116 ResRC = &AArch64::GPR64RegClass; 5117 } else { 5118 return false; 5119 } 5120 5121 const MCInstrDesc &II = TII.get(Opc); 5122 5123 const unsigned AddrReg = constrainOperandRegClass( 5124 II, getRegForValue(I->getPointerOperand()), II.getNumDefs()); 5125 const unsigned DesiredReg = constrainOperandRegClass( 5126 II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1); 5127 const unsigned NewReg = constrainOperandRegClass( 5128 II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2); 5129 5130 const unsigned ResultReg1 = createResultReg(ResRC); 5131 const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass); 5132 const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass); 5133 5134 // FIXME: MachineMemOperand doesn't support cmpxchg yet. 5135 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) 5136 .addDef(ResultReg1) 5137 .addDef(ScratchReg) 5138 .addUse(AddrReg) 5139 .addUse(DesiredReg) 5140 .addUse(NewReg); 5141 5142 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc)) 5143 .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR) 5144 .addUse(ResultReg1) 5145 .addUse(DesiredReg) 5146 .addImm(0); 5147 5148 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr)) 5149 .addDef(ResultReg2) 5150 .addUse(AArch64::WZR) 5151 .addUse(AArch64::WZR) 5152 .addImm(AArch64CC::NE); 5153 5154 assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers."); 5155 updateValueMap(I, ResultReg1, 2); 5156 return true; 5157 } 5158 5159 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { 5160 switch (I->getOpcode()) { 5161 default: 5162 break; 5163 case Instruction::Add: 5164 case Instruction::Sub: 5165 return selectAddSub(I); 5166 case Instruction::Mul: 5167 return selectMul(I); 5168 case Instruction::SDiv: 5169 return selectSDiv(I); 5170 case Instruction::SRem: 5171 if (!selectBinaryOp(I, ISD::SREM)) 5172 return selectRem(I, ISD::SREM); 5173 return true; 5174 case Instruction::URem: 5175 if (!selectBinaryOp(I, ISD::UREM)) 5176 return selectRem(I, ISD::UREM); 5177 return true; 5178 case Instruction::Shl: 5179 case Instruction::LShr: 5180 case Instruction::AShr: 5181 return selectShift(I); 5182 case Instruction::And: 5183 case Instruction::Or: 5184 case Instruction::Xor: 5185 return selectLogicalOp(I); 5186 case Instruction::Br: 5187 return selectBranch(I); 5188 case Instruction::IndirectBr: 5189 return selectIndirectBr(I); 5190 case Instruction::BitCast: 5191 if (!FastISel::selectBitCast(I)) 5192 return selectBitCast(I); 5193 return true; 5194 case Instruction::FPToSI: 5195 if (!selectCast(I, ISD::FP_TO_SINT)) 5196 return selectFPToInt(I, /*Signed=*/true); 5197 return true; 5198 case Instruction::FPToUI: 5199 return selectFPToInt(I, /*Signed=*/false); 5200 case Instruction::ZExt: 5201 case Instruction::SExt: 5202 return selectIntExt(I); 5203 case Instruction::Trunc: 5204 if (!selectCast(I, ISD::TRUNCATE)) 5205 return selectTrunc(I); 5206 return true; 5207 case Instruction::FPExt: 5208 return selectFPExt(I); 5209 case Instruction::FPTrunc: 5210 return selectFPTrunc(I); 5211 case Instruction::SIToFP: 5212 if (!selectCast(I, ISD::SINT_TO_FP)) 5213 return selectIntToFP(I, /*Signed=*/true); 5214 return true; 5215 case Instruction::UIToFP: 5216 return selectIntToFP(I, /*Signed=*/false); 5217 case Instruction::Load: 5218 return selectLoad(I); 5219 case Instruction::Store: 5220 return selectStore(I); 5221 case Instruction::FCmp: 5222 case Instruction::ICmp: 5223 return selectCmp(I); 5224 case Instruction::Select: 5225 return selectSelect(I); 5226 case Instruction::Ret: 5227 return selectRet(I); 5228 case Instruction::FRem: 5229 return selectFRem(I); 5230 case Instruction::GetElementPtr: 5231 return selectGetElementPtr(I); 5232 case Instruction::AtomicCmpXchg: 5233 return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I)); 5234 } 5235 5236 // fall-back to target-independent instruction selection. 5237 return selectOperator(I, I->getOpcode()); 5238 } 5239 5240 namespace llvm { 5241 5242 FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo, 5243 const TargetLibraryInfo *LibInfo) { 5244 return new AArch64FastISel(FuncInfo, LibInfo); 5245 } 5246 5247 } // end namespace llvm 5248