1 //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the InstructionSelector class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13
14 #include "AArch64InstrInfo.h"
15 #include "AArch64MachineFunctionInfo.h"
16 #include "AArch64RegisterBankInfo.h"
17 #include "AArch64RegisterInfo.h"
18 #include "AArch64Subtarget.h"
19 #include "AArch64TargetMachine.h"
20 #include "MCTargetDesc/AArch64AddressingModes.h"
21 #include "llvm/ADT/Optional.h"
22 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
23 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
24 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
25 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
26 #include "llvm/CodeGen/GlobalISel/Utils.h"
27 #include "llvm/CodeGen/MachineBasicBlock.h"
28 #include "llvm/CodeGen/MachineConstantPool.h"
29 #include "llvm/CodeGen/MachineFunction.h"
30 #include "llvm/CodeGen/MachineInstr.h"
31 #include "llvm/CodeGen/MachineInstrBuilder.h"
32 #include "llvm/CodeGen/MachineOperand.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/TargetOpcodes.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/Type.h"
37 #include "llvm/IR/IntrinsicsAArch64.h"
38 #include "llvm/Support/Debug.h"
39 #include "llvm/Support/raw_ostream.h"
40
41 #define DEBUG_TYPE "aarch64-isel"
42
43 using namespace llvm;
44
45 namespace {
46
47 #define GET_GLOBALISEL_PREDICATE_BITSET
48 #include "AArch64GenGlobalISel.inc"
49 #undef GET_GLOBALISEL_PREDICATE_BITSET
50
51 class AArch64InstructionSelector : public InstructionSelector {
52 public:
53 AArch64InstructionSelector(const AArch64TargetMachine &TM,
54 const AArch64Subtarget &STI,
55 const AArch64RegisterBankInfo &RBI);
56
57 bool select(MachineInstr &I) override;
getName()58 static const char *getName() { return DEBUG_TYPE; }
59
setupMF(MachineFunction & MF,GISelKnownBits & KB,CodeGenCoverage & CoverageInfo)60 void setupMF(MachineFunction &MF, GISelKnownBits &KB,
61 CodeGenCoverage &CoverageInfo) override {
62 InstructionSelector::setupMF(MF, KB, CoverageInfo);
63
64 // hasFnAttribute() is expensive to call on every BRCOND selection, so
65 // cache it here for each run of the selector.
66 ProduceNonFlagSettingCondBr =
67 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
68 MFReturnAddr = Register();
69
70 processPHIs(MF);
71 }
72
73 private:
74 /// tblgen-erated 'select' implementation, used as the initial selector for
75 /// the patterns that don't require complex C++.
76 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
77
78 // A lowering phase that runs before any selection attempts.
79 // Returns true if the instruction was modified.
80 bool preISelLower(MachineInstr &I);
81
82 // An early selection function that runs before the selectImpl() call.
83 bool earlySelect(MachineInstr &I) const;
84
85 // Do some preprocessing of G_PHIs before we begin selection.
86 void processPHIs(MachineFunction &MF);
87
88 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
89
90 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
91 bool contractCrossBankCopyIntoStore(MachineInstr &I,
92 MachineRegisterInfo &MRI);
93
94 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
95
96 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
97 MachineRegisterInfo &MRI) const;
98 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
99 MachineRegisterInfo &MRI) const;
100
101 bool tryOptAndIntoCompareBranch(MachineInstr *LHS,
102 int64_t CmpConstant,
103 const CmpInst::Predicate &Pred,
104 MachineBasicBlock *DstMBB,
105 MachineIRBuilder &MIB) const;
106 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
107 MachineRegisterInfo &MRI) const;
108
109 bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const;
110 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
111
112 // Helper to generate an equivalent of scalar_to_vector into a new register,
113 // returned via 'Dst'.
114 MachineInstr *emitScalarToVector(unsigned EltSize,
115 const TargetRegisterClass *DstRC,
116 Register Scalar,
117 MachineIRBuilder &MIRBuilder) const;
118
119 /// Emit a lane insert into \p DstReg, or a new vector register if None is
120 /// provided.
121 ///
122 /// The lane inserted into is defined by \p LaneIdx. The vector source
123 /// register is given by \p SrcReg. The register containing the element is
124 /// given by \p EltReg.
125 MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
126 Register EltReg, unsigned LaneIdx,
127 const RegisterBank &RB,
128 MachineIRBuilder &MIRBuilder) const;
129 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
130 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
131 MachineRegisterInfo &MRI) const;
132 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
133 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
134 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
135
136 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
137 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
138 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
139 bool selectSplitVectorUnmerge(MachineInstr &I,
140 MachineRegisterInfo &MRI) const;
141 bool selectIntrinsicWithSideEffects(MachineInstr &I,
142 MachineRegisterInfo &MRI) const;
143 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
144 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
145 bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
146 bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
147 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
148 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
149 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const;
150
151 unsigned emitConstantPoolEntry(const Constant *CPVal,
152 MachineFunction &MF) const;
153 MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
154 MachineIRBuilder &MIRBuilder) const;
155
156 // Emit a vector concat operation.
157 MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
158 Register Op2,
159 MachineIRBuilder &MIRBuilder) const;
160
161 // Emit an integer compare between LHS and RHS, which checks for Predicate.
162 //
163 // This returns the produced compare instruction, and the predicate which
164 // was ultimately used in the compare. The predicate may differ from what
165 // is passed in \p Predicate due to optimization.
166 std::pair<MachineInstr *, CmpInst::Predicate>
167 emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
168 MachineOperand &Predicate,
169 MachineIRBuilder &MIRBuilder) const;
170 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, MachineOperand &RHS,
171 MachineIRBuilder &MIRBuilder) const;
172 MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
173 MachineIRBuilder &MIRBuilder) const;
174 MachineInstr *emitTST(const Register &LHS, const Register &RHS,
175 MachineIRBuilder &MIRBuilder) const;
176 MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
177 const RegisterBank &DstRB, LLT ScalarTy,
178 Register VecReg, unsigned LaneIdx,
179 MachineIRBuilder &MIRBuilder) const;
180
181 /// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be
182 /// materialized using a FMOV instruction, then update MI and return it.
183 /// Otherwise, do nothing and return a nullptr.
184 MachineInstr *emitFMovForFConstant(MachineInstr &MI,
185 MachineRegisterInfo &MRI) const;
186
187 /// Emit a CSet for a compare.
188 MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
189 MachineIRBuilder &MIRBuilder) const;
190
191 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
192 /// \p IsNegative is true if the test should be "not zero".
193 /// This will also optimize the test bit instruction when possible.
194 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
195 MachineBasicBlock *DstMBB,
196 MachineIRBuilder &MIB) const;
197
198 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
199 // We use these manually instead of using the importer since it doesn't
200 // support SDNodeXForm.
201 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
202 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
203 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
204 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
205
206 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
207 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
208 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
209
210 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
211 unsigned Size) const;
212
selectAddrModeUnscaled8(MachineOperand & Root) const213 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
214 return selectAddrModeUnscaled(Root, 1);
215 }
selectAddrModeUnscaled16(MachineOperand & Root) const216 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
217 return selectAddrModeUnscaled(Root, 2);
218 }
selectAddrModeUnscaled32(MachineOperand & Root) const219 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
220 return selectAddrModeUnscaled(Root, 4);
221 }
selectAddrModeUnscaled64(MachineOperand & Root) const222 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
223 return selectAddrModeUnscaled(Root, 8);
224 }
selectAddrModeUnscaled128(MachineOperand & Root) const225 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
226 return selectAddrModeUnscaled(Root, 16);
227 }
228
229 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
230 /// from complex pattern matchers like selectAddrModeIndexed().
231 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
232 MachineRegisterInfo &MRI) const;
233
234 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
235 unsigned Size) const;
236 template <int Width>
selectAddrModeIndexed(MachineOperand & Root) const237 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
238 return selectAddrModeIndexed(Root, Width / 8);
239 }
240
241 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
242 const MachineRegisterInfo &MRI) const;
243 ComplexRendererFns
244 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
245 unsigned SizeInBytes) const;
246
247 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
248 /// or not a shift + extend should be folded into an addressing mode. Returns
249 /// None when this is not profitable or possible.
250 ComplexRendererFns
251 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
252 MachineOperand &Offset, unsigned SizeInBytes,
253 bool WantsExt) const;
254 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
255 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
256 unsigned SizeInBytes) const;
257 template <int Width>
selectAddrModeXRO(MachineOperand & Root) const258 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
259 return selectAddrModeXRO(Root, Width / 8);
260 }
261
262 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
263 unsigned SizeInBytes) const;
264 template <int Width>
selectAddrModeWRO(MachineOperand & Root) const265 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
266 return selectAddrModeWRO(Root, Width / 8);
267 }
268
269 ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const;
270
selectArithShiftedRegister(MachineOperand & Root) const271 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
272 return selectShiftedRegister(Root);
273 }
274
selectLogicalShiftedRegister(MachineOperand & Root) const275 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
276 // TODO: selectShiftedRegister should allow for rotates on logical shifts.
277 // For now, make them the same. The only difference between the two is that
278 // logical shifts are allowed to fold in rotates. Otherwise, these are
279 // functionally the same.
280 return selectShiftedRegister(Root);
281 }
282
283 /// Given an extend instruction, determine the correct shift-extend type for
284 /// that instruction.
285 ///
286 /// If the instruction is going to be used in a load or store, pass
287 /// \p IsLoadStore = true.
288 AArch64_AM::ShiftExtendType
289 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
290 bool IsLoadStore = false) const;
291
292 /// Instructions that accept extend modifiers like UXTW expect the register
293 /// being extended to be a GPR32. Narrow ExtReg to a 32-bit register using a
294 /// subregister copy if necessary. Return either ExtReg, or the result of the
295 /// new copy.
296 Register narrowExtendRegIfNeeded(Register ExtReg,
297 MachineIRBuilder &MIB) const;
298 Register widenGPRBankRegIfNeeded(Register Reg, unsigned Size,
299 MachineIRBuilder &MIB) const;
300 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
301
302 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
303 int OpIdx = -1) const;
304 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
305 int OpIdx = -1) const;
306 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
307 int OpIdx = -1) const;
308
309 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
310 void materializeLargeCMVal(MachineInstr &I, const Value *V,
311 unsigned OpFlags) const;
312
313 // Optimization methods.
314 bool tryOptSelect(MachineInstr &MI) const;
315 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
316 MachineOperand &Predicate,
317 MachineIRBuilder &MIRBuilder) const;
318 MachineInstr *tryOptArithImmedIntegerCompare(MachineOperand &LHS,
319 MachineOperand &RHS,
320 CmpInst::Predicate &Predicate,
321 MachineIRBuilder &MIB) const;
322 MachineInstr *tryOptArithShiftedCompare(MachineOperand &LHS,
323 MachineOperand &RHS,
324 MachineIRBuilder &MIB) const;
325
326 /// Return true if \p MI is a load or store of \p NumBytes bytes.
327 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
328
329 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
330 /// register zeroed out. In other words, the result of MI has been explicitly
331 /// zero extended.
332 bool isDef32(const MachineInstr &MI) const;
333
334 const AArch64TargetMachine &TM;
335 const AArch64Subtarget &STI;
336 const AArch64InstrInfo &TII;
337 const AArch64RegisterInfo &TRI;
338 const AArch64RegisterBankInfo &RBI;
339
340 bool ProduceNonFlagSettingCondBr = false;
341
342 // Some cached values used during selection.
343 // We use LR as a live-in register, and we keep track of it here as it can be
344 // clobbered by calls.
345 Register MFReturnAddr;
346
347 #define GET_GLOBALISEL_PREDICATES_DECL
348 #include "AArch64GenGlobalISel.inc"
349 #undef GET_GLOBALISEL_PREDICATES_DECL
350
351 // We declare the temporaries used by selectImpl() in the class to minimize the
352 // cost of constructing placeholder values.
353 #define GET_GLOBALISEL_TEMPORARIES_DECL
354 #include "AArch64GenGlobalISel.inc"
355 #undef GET_GLOBALISEL_TEMPORARIES_DECL
356 };
357
358 } // end anonymous namespace
359
360 #define GET_GLOBALISEL_IMPL
361 #include "AArch64GenGlobalISel.inc"
362 #undef GET_GLOBALISEL_IMPL
363
AArch64InstructionSelector(const AArch64TargetMachine & TM,const AArch64Subtarget & STI,const AArch64RegisterBankInfo & RBI)364 AArch64InstructionSelector::AArch64InstructionSelector(
365 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
366 const AArch64RegisterBankInfo &RBI)
367 : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
368 TRI(*STI.getRegisterInfo()), RBI(RBI),
369 #define GET_GLOBALISEL_PREDICATES_INIT
370 #include "AArch64GenGlobalISel.inc"
371 #undef GET_GLOBALISEL_PREDICATES_INIT
372 #define GET_GLOBALISEL_TEMPORARIES_INIT
373 #include "AArch64GenGlobalISel.inc"
374 #undef GET_GLOBALISEL_TEMPORARIES_INIT
375 {
376 }
377
378 // FIXME: This should be target-independent, inferred from the types declared
379 // for each class in the bank.
380 static const TargetRegisterClass *
getRegClassForTypeOnBank(LLT Ty,const RegisterBank & RB,const RegisterBankInfo & RBI,bool GetAllRegSet=false)381 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
382 const RegisterBankInfo &RBI,
383 bool GetAllRegSet = false) {
384 if (RB.getID() == AArch64::GPRRegBankID) {
385 if (Ty.getSizeInBits() <= 32)
386 return GetAllRegSet ? &AArch64::GPR32allRegClass
387 : &AArch64::GPR32RegClass;
388 if (Ty.getSizeInBits() == 64)
389 return GetAllRegSet ? &AArch64::GPR64allRegClass
390 : &AArch64::GPR64RegClass;
391 return nullptr;
392 }
393
394 if (RB.getID() == AArch64::FPRRegBankID) {
395 if (Ty.getSizeInBits() <= 16)
396 return &AArch64::FPR16RegClass;
397 if (Ty.getSizeInBits() == 32)
398 return &AArch64::FPR32RegClass;
399 if (Ty.getSizeInBits() == 64)
400 return &AArch64::FPR64RegClass;
401 if (Ty.getSizeInBits() == 128)
402 return &AArch64::FPR128RegClass;
403 return nullptr;
404 }
405
406 return nullptr;
407 }
408
409 /// Given a register bank, and size in bits, return the smallest register class
410 /// that can represent that combination.
411 static const TargetRegisterClass *
getMinClassForRegBank(const RegisterBank & RB,unsigned SizeInBits,bool GetAllRegSet=false)412 getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
413 bool GetAllRegSet = false) {
414 unsigned RegBankID = RB.getID();
415
416 if (RegBankID == AArch64::GPRRegBankID) {
417 if (SizeInBits <= 32)
418 return GetAllRegSet ? &AArch64::GPR32allRegClass
419 : &AArch64::GPR32RegClass;
420 if (SizeInBits == 64)
421 return GetAllRegSet ? &AArch64::GPR64allRegClass
422 : &AArch64::GPR64RegClass;
423 }
424
425 if (RegBankID == AArch64::FPRRegBankID) {
426 switch (SizeInBits) {
427 default:
428 return nullptr;
429 case 8:
430 return &AArch64::FPR8RegClass;
431 case 16:
432 return &AArch64::FPR16RegClass;
433 case 32:
434 return &AArch64::FPR32RegClass;
435 case 64:
436 return &AArch64::FPR64RegClass;
437 case 128:
438 return &AArch64::FPR128RegClass;
439 }
440 }
441
442 return nullptr;
443 }
444
445 /// Returns the correct subregister to use for a given register class.
getSubRegForClass(const TargetRegisterClass * RC,const TargetRegisterInfo & TRI,unsigned & SubReg)446 static bool getSubRegForClass(const TargetRegisterClass *RC,
447 const TargetRegisterInfo &TRI, unsigned &SubReg) {
448 switch (TRI.getRegSizeInBits(*RC)) {
449 case 8:
450 SubReg = AArch64::bsub;
451 break;
452 case 16:
453 SubReg = AArch64::hsub;
454 break;
455 case 32:
456 if (RC != &AArch64::FPR32RegClass)
457 SubReg = AArch64::sub_32;
458 else
459 SubReg = AArch64::ssub;
460 break;
461 case 64:
462 SubReg = AArch64::dsub;
463 break;
464 default:
465 LLVM_DEBUG(
466 dbgs() << "Couldn't find appropriate subregister for register class.");
467 return false;
468 }
469
470 return true;
471 }
472
473 /// Returns the minimum size the given register bank can hold.
getMinSizeForRegBank(const RegisterBank & RB)474 static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
475 switch (RB.getID()) {
476 case AArch64::GPRRegBankID:
477 return 32;
478 case AArch64::FPRRegBankID:
479 return 8;
480 default:
481 llvm_unreachable("Tried to get minimum size for unknown register bank.");
482 }
483 }
484
getImmedFromMO(const MachineOperand & Root)485 static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
486 auto &MI = *Root.getParent();
487 auto &MBB = *MI.getParent();
488 auto &MF = *MBB.getParent();
489 auto &MRI = MF.getRegInfo();
490 uint64_t Immed;
491 if (Root.isImm())
492 Immed = Root.getImm();
493 else if (Root.isCImm())
494 Immed = Root.getCImm()->getZExtValue();
495 else if (Root.isReg()) {
496 auto ValAndVReg =
497 getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
498 if (!ValAndVReg)
499 return None;
500 Immed = ValAndVReg->Value;
501 } else
502 return None;
503 return Immed;
504 }
505
506 /// Check whether \p I is a currently unsupported binary operation:
507 /// - it has an unsized type
508 /// - an operand is not a vreg
509 /// - all operands are not in the same bank
510 /// These are checks that should someday live in the verifier, but right now,
511 /// these are mostly limitations of the aarch64 selector.
unsupportedBinOp(const MachineInstr & I,const AArch64RegisterBankInfo & RBI,const MachineRegisterInfo & MRI,const AArch64RegisterInfo & TRI)512 static bool unsupportedBinOp(const MachineInstr &I,
513 const AArch64RegisterBankInfo &RBI,
514 const MachineRegisterInfo &MRI,
515 const AArch64RegisterInfo &TRI) {
516 LLT Ty = MRI.getType(I.getOperand(0).getReg());
517 if (!Ty.isValid()) {
518 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
519 return true;
520 }
521
522 const RegisterBank *PrevOpBank = nullptr;
523 for (auto &MO : I.operands()) {
524 // FIXME: Support non-register operands.
525 if (!MO.isReg()) {
526 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
527 return true;
528 }
529
530 // FIXME: Can generic operations have physical registers operands? If
531 // so, this will need to be taught about that, and we'll need to get the
532 // bank out of the minimal class for the register.
533 // Either way, this needs to be documented (and possibly verified).
534 if (!Register::isVirtualRegister(MO.getReg())) {
535 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
536 return true;
537 }
538
539 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
540 if (!OpBank) {
541 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
542 return true;
543 }
544
545 if (PrevOpBank && OpBank != PrevOpBank) {
546 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
547 return true;
548 }
549 PrevOpBank = OpBank;
550 }
551 return false;
552 }
553
554 /// Select the AArch64 opcode for the basic binary operation \p GenericOpc
555 /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
556 /// and of size \p OpSize.
557 /// \returns \p GenericOpc if the combination is unsupported.
selectBinaryOp(unsigned GenericOpc,unsigned RegBankID,unsigned OpSize)558 static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
559 unsigned OpSize) {
560 switch (RegBankID) {
561 case AArch64::GPRRegBankID:
562 if (OpSize == 32) {
563 switch (GenericOpc) {
564 case TargetOpcode::G_SHL:
565 return AArch64::LSLVWr;
566 case TargetOpcode::G_LSHR:
567 return AArch64::LSRVWr;
568 case TargetOpcode::G_ASHR:
569 return AArch64::ASRVWr;
570 default:
571 return GenericOpc;
572 }
573 } else if (OpSize == 64) {
574 switch (GenericOpc) {
575 case TargetOpcode::G_PTR_ADD:
576 return AArch64::ADDXrr;
577 case TargetOpcode::G_SHL:
578 return AArch64::LSLVXr;
579 case TargetOpcode::G_LSHR:
580 return AArch64::LSRVXr;
581 case TargetOpcode::G_ASHR:
582 return AArch64::ASRVXr;
583 default:
584 return GenericOpc;
585 }
586 }
587 break;
588 case AArch64::FPRRegBankID:
589 switch (OpSize) {
590 case 32:
591 switch (GenericOpc) {
592 case TargetOpcode::G_FADD:
593 return AArch64::FADDSrr;
594 case TargetOpcode::G_FSUB:
595 return AArch64::FSUBSrr;
596 case TargetOpcode::G_FMUL:
597 return AArch64::FMULSrr;
598 case TargetOpcode::G_FDIV:
599 return AArch64::FDIVSrr;
600 default:
601 return GenericOpc;
602 }
603 case 64:
604 switch (GenericOpc) {
605 case TargetOpcode::G_FADD:
606 return AArch64::FADDDrr;
607 case TargetOpcode::G_FSUB:
608 return AArch64::FSUBDrr;
609 case TargetOpcode::G_FMUL:
610 return AArch64::FMULDrr;
611 case TargetOpcode::G_FDIV:
612 return AArch64::FDIVDrr;
613 case TargetOpcode::G_OR:
614 return AArch64::ORRv8i8;
615 default:
616 return GenericOpc;
617 }
618 }
619 break;
620 }
621 return GenericOpc;
622 }
623
624 /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
625 /// appropriate for the (value) register bank \p RegBankID and of memory access
626 /// size \p OpSize. This returns the variant with the base+unsigned-immediate
627 /// addressing mode (e.g., LDRXui).
628 /// \returns \p GenericOpc if the combination is unsupported.
selectLoadStoreUIOp(unsigned GenericOpc,unsigned RegBankID,unsigned OpSize)629 static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
630 unsigned OpSize) {
631 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
632 switch (RegBankID) {
633 case AArch64::GPRRegBankID:
634 switch (OpSize) {
635 case 8:
636 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
637 case 16:
638 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
639 case 32:
640 return isStore ? AArch64::STRWui : AArch64::LDRWui;
641 case 64:
642 return isStore ? AArch64::STRXui : AArch64::LDRXui;
643 }
644 break;
645 case AArch64::FPRRegBankID:
646 switch (OpSize) {
647 case 8:
648 return isStore ? AArch64::STRBui : AArch64::LDRBui;
649 case 16:
650 return isStore ? AArch64::STRHui : AArch64::LDRHui;
651 case 32:
652 return isStore ? AArch64::STRSui : AArch64::LDRSui;
653 case 64:
654 return isStore ? AArch64::STRDui : AArch64::LDRDui;
655 }
656 break;
657 }
658 return GenericOpc;
659 }
660
661 #ifndef NDEBUG
662 /// Helper function that verifies that we have a valid copy at the end of
663 /// selectCopy. Verifies that the source and dest have the expected sizes and
664 /// then returns true.
isValidCopy(const MachineInstr & I,const RegisterBank & DstBank,const MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,const RegisterBankInfo & RBI)665 static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
666 const MachineRegisterInfo &MRI,
667 const TargetRegisterInfo &TRI,
668 const RegisterBankInfo &RBI) {
669 const Register DstReg = I.getOperand(0).getReg();
670 const Register SrcReg = I.getOperand(1).getReg();
671 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
672 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
673
674 // Make sure the size of the source and dest line up.
675 assert(
676 (DstSize == SrcSize ||
677 // Copies are a mean to setup initial types, the number of
678 // bits may not exactly match.
679 (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
680 // Copies are a mean to copy bits around, as long as we are
681 // on the same register class, that's fine. Otherwise, that
682 // means we need some SUBREG_TO_REG or AND & co.
683 (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
684 "Copy with different width?!");
685
686 // Check the size of the destination.
687 assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
688 "GPRs cannot get more than 64-bit width values");
689
690 return true;
691 }
692 #endif
693
694 /// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
695 /// to \p *To.
696 ///
697 /// E.g "To = COPY SrcReg:SubReg"
copySubReg(MachineInstr & I,MachineRegisterInfo & MRI,const RegisterBankInfo & RBI,Register SrcReg,const TargetRegisterClass * To,unsigned SubReg)698 static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
699 const RegisterBankInfo &RBI, Register SrcReg,
700 const TargetRegisterClass *To, unsigned SubReg) {
701 assert(SrcReg.isValid() && "Expected a valid source register?");
702 assert(To && "Destination register class cannot be null");
703 assert(SubReg && "Expected a valid subregister");
704
705 MachineIRBuilder MIB(I);
706 auto SubRegCopy =
707 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
708 MachineOperand &RegOp = I.getOperand(1);
709 RegOp.setReg(SubRegCopy.getReg(0));
710
711 // It's possible that the destination register won't be constrained. Make
712 // sure that happens.
713 if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
714 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
715
716 return true;
717 }
718
719 /// Helper function to get the source and destination register classes for a
720 /// copy. Returns a std::pair containing the source register class for the
721 /// copy, and the destination register class for the copy. If a register class
722 /// cannot be determined, then it will be nullptr.
723 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
getRegClassesForCopy(MachineInstr & I,const TargetInstrInfo & TII,MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,const RegisterBankInfo & RBI)724 getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
725 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
726 const RegisterBankInfo &RBI) {
727 Register DstReg = I.getOperand(0).getReg();
728 Register SrcReg = I.getOperand(1).getReg();
729 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
730 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
731 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
732 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
733
734 // Special casing for cross-bank copies of s1s. We can technically represent
735 // a 1-bit value with any size of register. The minimum size for a GPR is 32
736 // bits. So, we need to put the FPR on 32 bits as well.
737 //
738 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
739 // then we can pull it into the helpers that get the appropriate class for a
740 // register bank. Or make a new helper that carries along some constraint
741 // information.
742 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
743 SrcSize = DstSize = 32;
744
745 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
746 getMinClassForRegBank(DstRegBank, DstSize, true)};
747 }
748
selectCopy(MachineInstr & I,const TargetInstrInfo & TII,MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,const RegisterBankInfo & RBI)749 static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
750 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
751 const RegisterBankInfo &RBI) {
752 Register DstReg = I.getOperand(0).getReg();
753 Register SrcReg = I.getOperand(1).getReg();
754 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
755 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
756
757 // Find the correct register classes for the source and destination registers.
758 const TargetRegisterClass *SrcRC;
759 const TargetRegisterClass *DstRC;
760 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
761
762 if (!DstRC) {
763 LLVM_DEBUG(dbgs() << "Unexpected dest size "
764 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
765 return false;
766 }
767
768 // A couple helpers below, for making sure that the copy we produce is valid.
769
770 // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
771 // to verify that the src and dst are the same size, since that's handled by
772 // the SUBREG_TO_REG.
773 bool KnownValid = false;
774
775 // Returns true, or asserts if something we don't expect happens. Instead of
776 // returning true, we return isValidCopy() to ensure that we verify the
777 // result.
778 auto CheckCopy = [&]() {
779 // If we have a bitcast or something, we can't have physical registers.
780 assert((I.isCopy() ||
781 (!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&
782 !Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&
783 "No phys reg on generic operator!");
784 bool ValidCopy = true;
785 #ifndef NDEBUG
786 ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI);
787 assert(ValidCopy && "Invalid copy.");
788 #endif
789 return ValidCopy;
790 };
791
792 // Is this a copy? If so, then we may need to insert a subregister copy.
793 if (I.isCopy()) {
794 // Yes. Check if there's anything to fix up.
795 if (!SrcRC) {
796 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
797 return false;
798 }
799
800 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
801 unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
802 unsigned SubReg;
803
804 // If the source bank doesn't support a subregister copy small enough,
805 // then we first need to copy to the destination bank.
806 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
807 const TargetRegisterClass *DstTempRC =
808 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
809 getSubRegForClass(DstRC, TRI, SubReg);
810
811 MachineIRBuilder MIB(I);
812 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
813 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
814 } else if (SrcSize > DstSize) {
815 // If the source register is bigger than the destination we need to
816 // perform a subregister copy.
817 const TargetRegisterClass *SubRegRC =
818 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
819 getSubRegForClass(SubRegRC, TRI, SubReg);
820 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
821 } else if (DstSize > SrcSize) {
822 // If the destination register is bigger than the source we need to do
823 // a promotion using SUBREG_TO_REG.
824 const TargetRegisterClass *PromotionRC =
825 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
826 getSubRegForClass(SrcRC, TRI, SubReg);
827
828 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
829 BuildMI(*I.getParent(), I, I.getDebugLoc(),
830 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
831 .addImm(0)
832 .addUse(SrcReg)
833 .addImm(SubReg);
834 MachineOperand &RegOp = I.getOperand(1);
835 RegOp.setReg(PromoteReg);
836
837 // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
838 KnownValid = true;
839 }
840
841 // If the destination is a physical register, then there's nothing to
842 // change, so we're done.
843 if (Register::isPhysicalRegister(DstReg))
844 return CheckCopy();
845 }
846
847 // No need to constrain SrcReg. It will get constrained when we hit another
848 // of its use or its defs. Copies do not have constraints.
849 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
850 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
851 << " operand\n");
852 return false;
853 }
854 I.setDesc(TII.get(AArch64::COPY));
855 return CheckCopy();
856 }
857
selectFPConvOpc(unsigned GenericOpc,LLT DstTy,LLT SrcTy)858 static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
859 if (!DstTy.isScalar() || !SrcTy.isScalar())
860 return GenericOpc;
861
862 const unsigned DstSize = DstTy.getSizeInBits();
863 const unsigned SrcSize = SrcTy.getSizeInBits();
864
865 switch (DstSize) {
866 case 32:
867 switch (SrcSize) {
868 case 32:
869 switch (GenericOpc) {
870 case TargetOpcode::G_SITOFP:
871 return AArch64::SCVTFUWSri;
872 case TargetOpcode::G_UITOFP:
873 return AArch64::UCVTFUWSri;
874 case TargetOpcode::G_FPTOSI:
875 return AArch64::FCVTZSUWSr;
876 case TargetOpcode::G_FPTOUI:
877 return AArch64::FCVTZUUWSr;
878 default:
879 return GenericOpc;
880 }
881 case 64:
882 switch (GenericOpc) {
883 case TargetOpcode::G_SITOFP:
884 return AArch64::SCVTFUXSri;
885 case TargetOpcode::G_UITOFP:
886 return AArch64::UCVTFUXSri;
887 case TargetOpcode::G_FPTOSI:
888 return AArch64::FCVTZSUWDr;
889 case TargetOpcode::G_FPTOUI:
890 return AArch64::FCVTZUUWDr;
891 default:
892 return GenericOpc;
893 }
894 default:
895 return GenericOpc;
896 }
897 case 64:
898 switch (SrcSize) {
899 case 32:
900 switch (GenericOpc) {
901 case TargetOpcode::G_SITOFP:
902 return AArch64::SCVTFUWDri;
903 case TargetOpcode::G_UITOFP:
904 return AArch64::UCVTFUWDri;
905 case TargetOpcode::G_FPTOSI:
906 return AArch64::FCVTZSUXSr;
907 case TargetOpcode::G_FPTOUI:
908 return AArch64::FCVTZUUXSr;
909 default:
910 return GenericOpc;
911 }
912 case 64:
913 switch (GenericOpc) {
914 case TargetOpcode::G_SITOFP:
915 return AArch64::SCVTFUXDri;
916 case TargetOpcode::G_UITOFP:
917 return AArch64::UCVTFUXDri;
918 case TargetOpcode::G_FPTOSI:
919 return AArch64::FCVTZSUXDr;
920 case TargetOpcode::G_FPTOUI:
921 return AArch64::FCVTZUUXDr;
922 default:
923 return GenericOpc;
924 }
925 default:
926 return GenericOpc;
927 }
928 default:
929 return GenericOpc;
930 };
931 return GenericOpc;
932 }
933
selectSelectOpc(MachineInstr & I,MachineRegisterInfo & MRI,const RegisterBankInfo & RBI)934 static unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI,
935 const RegisterBankInfo &RBI) {
936 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
937 bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
938 AArch64::GPRRegBankID);
939 LLT Ty = MRI.getType(I.getOperand(0).getReg());
940 if (Ty == LLT::scalar(32))
941 return IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
942 else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64))
943 return IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
944 return 0;
945 }
946
947 /// Helper function to select the opcode for a G_FCMP.
selectFCMPOpc(MachineInstr & I,MachineRegisterInfo & MRI)948 static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) {
949 // If this is a compare against +0.0, then we don't have to explicitly
950 // materialize a constant.
951 const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI);
952 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
953 unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
954 if (OpSize != 32 && OpSize != 64)
955 return 0;
956 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
957 {AArch64::FCMPSri, AArch64::FCMPDri}};
958 return CmpOpcTbl[ShouldUseImm][OpSize == 64];
959 }
960
961 /// Returns true if \p P is an unsigned integer comparison predicate.
isUnsignedICMPPred(const CmpInst::Predicate P)962 static bool isUnsignedICMPPred(const CmpInst::Predicate P) {
963 switch (P) {
964 default:
965 return false;
966 case CmpInst::ICMP_UGT:
967 case CmpInst::ICMP_UGE:
968 case CmpInst::ICMP_ULT:
969 case CmpInst::ICMP_ULE:
970 return true;
971 }
972 }
973
changeICMPPredToAArch64CC(CmpInst::Predicate P)974 static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
975 switch (P) {
976 default:
977 llvm_unreachable("Unknown condition code!");
978 case CmpInst::ICMP_NE:
979 return AArch64CC::NE;
980 case CmpInst::ICMP_EQ:
981 return AArch64CC::EQ;
982 case CmpInst::ICMP_SGT:
983 return AArch64CC::GT;
984 case CmpInst::ICMP_SGE:
985 return AArch64CC::GE;
986 case CmpInst::ICMP_SLT:
987 return AArch64CC::LT;
988 case CmpInst::ICMP_SLE:
989 return AArch64CC::LE;
990 case CmpInst::ICMP_UGT:
991 return AArch64CC::HI;
992 case CmpInst::ICMP_UGE:
993 return AArch64CC::HS;
994 case CmpInst::ICMP_ULT:
995 return AArch64CC::LO;
996 case CmpInst::ICMP_ULE:
997 return AArch64CC::LS;
998 }
999 }
1000
changeFCMPPredToAArch64CC(CmpInst::Predicate P,AArch64CC::CondCode & CondCode,AArch64CC::CondCode & CondCode2)1001 static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
1002 AArch64CC::CondCode &CondCode,
1003 AArch64CC::CondCode &CondCode2) {
1004 CondCode2 = AArch64CC::AL;
1005 switch (P) {
1006 default:
1007 llvm_unreachable("Unknown FP condition!");
1008 case CmpInst::FCMP_OEQ:
1009 CondCode = AArch64CC::EQ;
1010 break;
1011 case CmpInst::FCMP_OGT:
1012 CondCode = AArch64CC::GT;
1013 break;
1014 case CmpInst::FCMP_OGE:
1015 CondCode = AArch64CC::GE;
1016 break;
1017 case CmpInst::FCMP_OLT:
1018 CondCode = AArch64CC::MI;
1019 break;
1020 case CmpInst::FCMP_OLE:
1021 CondCode = AArch64CC::LS;
1022 break;
1023 case CmpInst::FCMP_ONE:
1024 CondCode = AArch64CC::MI;
1025 CondCode2 = AArch64CC::GT;
1026 break;
1027 case CmpInst::FCMP_ORD:
1028 CondCode = AArch64CC::VC;
1029 break;
1030 case CmpInst::FCMP_UNO:
1031 CondCode = AArch64CC::VS;
1032 break;
1033 case CmpInst::FCMP_UEQ:
1034 CondCode = AArch64CC::EQ;
1035 CondCode2 = AArch64CC::VS;
1036 break;
1037 case CmpInst::FCMP_UGT:
1038 CondCode = AArch64CC::HI;
1039 break;
1040 case CmpInst::FCMP_UGE:
1041 CondCode = AArch64CC::PL;
1042 break;
1043 case CmpInst::FCMP_ULT:
1044 CondCode = AArch64CC::LT;
1045 break;
1046 case CmpInst::FCMP_ULE:
1047 CondCode = AArch64CC::LE;
1048 break;
1049 case CmpInst::FCMP_UNE:
1050 CondCode = AArch64CC::NE;
1051 break;
1052 }
1053 }
1054
1055 /// Return a register which can be used as a bit to test in a TB(N)Z.
getTestBitReg(Register Reg,uint64_t & Bit,bool & Invert,MachineRegisterInfo & MRI)1056 static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1057 MachineRegisterInfo &MRI) {
1058 assert(Reg.isValid() && "Expected valid register!");
1059 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1060 unsigned Opc = MI->getOpcode();
1061
1062 if (!MI->getOperand(0).isReg() ||
1063 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1064 break;
1065
1066 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1067 //
1068 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1069 // on the truncated x is the same as the bit number on x.
1070 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1071 Opc == TargetOpcode::G_TRUNC) {
1072 Register NextReg = MI->getOperand(1).getReg();
1073 // Did we find something worth folding?
1074 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1075 break;
1076
1077 // NextReg is worth folding. Keep looking.
1078 Reg = NextReg;
1079 continue;
1080 }
1081
1082 // Attempt to find a suitable operation with a constant on one side.
1083 Optional<uint64_t> C;
1084 Register TestReg;
1085 switch (Opc) {
1086 default:
1087 break;
1088 case TargetOpcode::G_AND:
1089 case TargetOpcode::G_XOR: {
1090 TestReg = MI->getOperand(1).getReg();
1091 Register ConstantReg = MI->getOperand(2).getReg();
1092 auto VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
1093 if (!VRegAndVal) {
1094 // AND commutes, check the other side for a constant.
1095 // FIXME: Can we canonicalize the constant so that it's always on the
1096 // same side at some point earlier?
1097 std::swap(ConstantReg, TestReg);
1098 VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
1099 }
1100 if (VRegAndVal)
1101 C = VRegAndVal->Value;
1102 break;
1103 }
1104 case TargetOpcode::G_ASHR:
1105 case TargetOpcode::G_LSHR:
1106 case TargetOpcode::G_SHL: {
1107 TestReg = MI->getOperand(1).getReg();
1108 auto VRegAndVal =
1109 getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1110 if (VRegAndVal)
1111 C = VRegAndVal->Value;
1112 break;
1113 }
1114 }
1115
1116 // Didn't find a constant or viable register. Bail out of the loop.
1117 if (!C || !TestReg.isValid())
1118 break;
1119
1120 // We found a suitable instruction with a constant. Check to see if we can
1121 // walk through the instruction.
1122 Register NextReg;
1123 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1124 switch (Opc) {
1125 default:
1126 break;
1127 case TargetOpcode::G_AND:
1128 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1129 if ((*C >> Bit) & 1)
1130 NextReg = TestReg;
1131 break;
1132 case TargetOpcode::G_SHL:
1133 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1134 // the type of the register.
1135 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1136 NextReg = TestReg;
1137 Bit = Bit - *C;
1138 }
1139 break;
1140 case TargetOpcode::G_ASHR:
1141 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1142 // in x
1143 NextReg = TestReg;
1144 Bit = Bit + *C;
1145 if (Bit >= TestRegSize)
1146 Bit = TestRegSize - 1;
1147 break;
1148 case TargetOpcode::G_LSHR:
1149 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1150 if ((Bit + *C) < TestRegSize) {
1151 NextReg = TestReg;
1152 Bit = Bit + *C;
1153 }
1154 break;
1155 case TargetOpcode::G_XOR:
1156 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1157 // appropriate.
1158 //
1159 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1160 //
1161 // tbz x', b -> tbnz x, b
1162 //
1163 // Because x' only has the b-th bit set if x does not.
1164 if ((*C >> Bit) & 1)
1165 Invert = !Invert;
1166 NextReg = TestReg;
1167 break;
1168 }
1169
1170 // Check if we found anything worth folding.
1171 if (!NextReg.isValid())
1172 return Reg;
1173 Reg = NextReg;
1174 }
1175
1176 return Reg;
1177 }
1178
emitTestBit(Register TestReg,uint64_t Bit,bool IsNegative,MachineBasicBlock * DstMBB,MachineIRBuilder & MIB) const1179 MachineInstr *AArch64InstructionSelector::emitTestBit(
1180 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1181 MachineIRBuilder &MIB) const {
1182 assert(TestReg.isValid());
1183 assert(ProduceNonFlagSettingCondBr &&
1184 "Cannot emit TB(N)Z with speculation tracking!");
1185 MachineRegisterInfo &MRI = *MIB.getMRI();
1186
1187 // Attempt to optimize the test bit by walking over instructions.
1188 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1189 LLT Ty = MRI.getType(TestReg);
1190 unsigned Size = Ty.getSizeInBits();
1191 assert(!Ty.isVector() && "Expected a scalar!");
1192 assert(Bit < 64 && "Bit is too large!");
1193
1194 // When the test register is a 64-bit register, we have to narrow to make
1195 // TBNZW work.
1196 bool UseWReg = Bit < 32;
1197 unsigned NecessarySize = UseWReg ? 32 : 64;
1198 if (Size < NecessarySize)
1199 TestReg = widenGPRBankRegIfNeeded(TestReg, NecessarySize, MIB);
1200 else if (Size > NecessarySize)
1201 TestReg = narrowExtendRegIfNeeded(TestReg, MIB);
1202
1203 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1204 {AArch64::TBZW, AArch64::TBNZW}};
1205 unsigned Opc = OpcTable[UseWReg][IsNegative];
1206 auto TestBitMI =
1207 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1208 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1209 return &*TestBitMI;
1210 }
1211
tryOptAndIntoCompareBranch(MachineInstr * AndInst,int64_t CmpConstant,const CmpInst::Predicate & Pred,MachineBasicBlock * DstMBB,MachineIRBuilder & MIB) const1212 bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1213 MachineInstr *AndInst, int64_t CmpConstant, const CmpInst::Predicate &Pred,
1214 MachineBasicBlock *DstMBB, MachineIRBuilder &MIB) const {
1215 // Given something like this:
1216 //
1217 // %x = ...Something...
1218 // %one = G_CONSTANT i64 1
1219 // %zero = G_CONSTANT i64 0
1220 // %and = G_AND %x, %one
1221 // %cmp = G_ICMP intpred(ne), %and, %zero
1222 // %cmp_trunc = G_TRUNC %cmp
1223 // G_BRCOND %cmp_trunc, %bb.3
1224 //
1225 // We want to try and fold the AND into the G_BRCOND and produce either a
1226 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1227 //
1228 // In this case, we'd get
1229 //
1230 // TBNZ %x %bb.3
1231 //
1232 if (!AndInst || AndInst->getOpcode() != TargetOpcode::G_AND)
1233 return false;
1234
1235 // Need to be comparing against 0 to fold.
1236 if (CmpConstant != 0)
1237 return false;
1238
1239 MachineRegisterInfo &MRI = *MIB.getMRI();
1240
1241 // Only support EQ and NE. If we have LT, then it *is* possible to fold, but
1242 // we don't want to do this. When we have an AND and LT, we need a TST/ANDS,
1243 // so folding would be redundant.
1244 if (Pred != CmpInst::Predicate::ICMP_EQ &&
1245 Pred != CmpInst::Predicate::ICMP_NE)
1246 return false;
1247
1248 // Check if the AND has a constant on its RHS which we can use as a mask.
1249 // If it's a power of 2, then it's the same as checking a specific bit.
1250 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1251 auto MaybeBit =
1252 getConstantVRegValWithLookThrough(AndInst->getOperand(2).getReg(), MRI);
1253 if (!MaybeBit || !isPowerOf2_64(MaybeBit->Value))
1254 return false;
1255
1256 uint64_t Bit = Log2_64(static_cast<uint64_t>(MaybeBit->Value));
1257 Register TestReg = AndInst->getOperand(1).getReg();
1258 bool Invert = Pred == CmpInst::Predicate::ICMP_NE;
1259
1260 // Emit a TB(N)Z.
1261 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1262 return true;
1263 }
1264
selectCompareBranch(MachineInstr & I,MachineFunction & MF,MachineRegisterInfo & MRI) const1265 bool AArch64InstructionSelector::selectCompareBranch(
1266 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1267
1268 const Register CondReg = I.getOperand(0).getReg();
1269 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1270 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1271 if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
1272 CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg());
1273 if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
1274 return false;
1275
1276 Register LHS = CCMI->getOperand(2).getReg();
1277 Register RHS = CCMI->getOperand(3).getReg();
1278 auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
1279 MachineIRBuilder MIB(I);
1280 CmpInst::Predicate Pred =
1281 (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
1282 MachineInstr *LHSMI = getDefIgnoringCopies(LHS, MRI);
1283
1284 // When we can emit a TB(N)Z, prefer that.
1285 //
1286 // Handle non-commutative condition codes first.
1287 // Note that we don't want to do this when we have a G_AND because it can
1288 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1289 if (VRegAndVal && LHSMI->getOpcode() != TargetOpcode::G_AND) {
1290 int64_t C = VRegAndVal->Value;
1291
1292 // When we have a greater-than comparison, we can just test if the msb is
1293 // zero.
1294 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1295 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1296 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1297 I.eraseFromParent();
1298 return true;
1299 }
1300
1301 // When we have a less than comparison, we can just test if the msb is not
1302 // zero.
1303 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1304 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1305 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1306 I.eraseFromParent();
1307 return true;
1308 }
1309 }
1310
1311 if (!VRegAndVal) {
1312 std::swap(RHS, LHS);
1313 VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
1314 LHSMI = getDefIgnoringCopies(LHS, MRI);
1315 }
1316
1317 if (!VRegAndVal || VRegAndVal->Value != 0) {
1318 // If we can't select a CBZ then emit a cmp + Bcc.
1319 MachineInstr *Cmp;
1320 std::tie(Cmp, Pred) = emitIntegerCompare(
1321 CCMI->getOperand(2), CCMI->getOperand(3), CCMI->getOperand(1), MIB);
1322 if (!Cmp)
1323 return false;
1324 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(Pred);
1325 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1326 I.eraseFromParent();
1327 return true;
1328 }
1329
1330 // Try to emit a TB(N)Z for an eq or ne condition.
1331 if (tryOptAndIntoCompareBranch(LHSMI, VRegAndVal->Value, Pred, DestMBB,
1332 MIB)) {
1333 I.eraseFromParent();
1334 return true;
1335 }
1336
1337 const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
1338 if (RB.getID() != AArch64::GPRRegBankID)
1339 return false;
1340 if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
1341 return false;
1342
1343 const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
1344 unsigned CBOpc = 0;
1345 if (CmpWidth <= 32)
1346 CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
1347 else if (CmpWidth == 64)
1348 CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
1349 else
1350 return false;
1351
1352 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
1353 .addUse(LHS)
1354 .addMBB(DestMBB)
1355 .constrainAllUses(TII, TRI, RBI);
1356
1357 I.eraseFromParent();
1358 return true;
1359 }
1360
1361 /// Returns the element immediate value of a vector shift operand if found.
1362 /// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
getVectorShiftImm(Register Reg,MachineRegisterInfo & MRI)1363 static Optional<int64_t> getVectorShiftImm(Register Reg,
1364 MachineRegisterInfo &MRI) {
1365 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1366 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1367 assert(OpMI && "Expected to find a vreg def for vector shift operand");
1368 if (OpMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR)
1369 return None;
1370
1371 // Check all operands are identical immediates.
1372 int64_t ImmVal = 0;
1373 for (unsigned Idx = 1; Idx < OpMI->getNumOperands(); ++Idx) {
1374 auto VRegAndVal = getConstantVRegValWithLookThrough(OpMI->getOperand(Idx).getReg(), MRI);
1375 if (!VRegAndVal)
1376 return None;
1377
1378 if (Idx == 1)
1379 ImmVal = VRegAndVal->Value;
1380 if (ImmVal != VRegAndVal->Value)
1381 return None;
1382 }
1383
1384 return ImmVal;
1385 }
1386
1387 /// Matches and returns the shift immediate value for a SHL instruction given
1388 /// a shift operand.
getVectorSHLImm(LLT SrcTy,Register Reg,MachineRegisterInfo & MRI)1389 static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) {
1390 Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1391 if (!ShiftImm)
1392 return None;
1393 // Check the immediate is in range for a SHL.
1394 int64_t Imm = *ShiftImm;
1395 if (Imm < 0)
1396 return None;
1397 switch (SrcTy.getElementType().getSizeInBits()) {
1398 default:
1399 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1400 return None;
1401 case 8:
1402 if (Imm > 7)
1403 return None;
1404 break;
1405 case 16:
1406 if (Imm > 15)
1407 return None;
1408 break;
1409 case 32:
1410 if (Imm > 31)
1411 return None;
1412 break;
1413 case 64:
1414 if (Imm > 63)
1415 return None;
1416 break;
1417 }
1418 return Imm;
1419 }
1420
selectVectorSHL(MachineInstr & I,MachineRegisterInfo & MRI) const1421 bool AArch64InstructionSelector::selectVectorSHL(
1422 MachineInstr &I, MachineRegisterInfo &MRI) const {
1423 assert(I.getOpcode() == TargetOpcode::G_SHL);
1424 Register DstReg = I.getOperand(0).getReg();
1425 const LLT Ty = MRI.getType(DstReg);
1426 Register Src1Reg = I.getOperand(1).getReg();
1427 Register Src2Reg = I.getOperand(2).getReg();
1428
1429 if (!Ty.isVector())
1430 return false;
1431
1432 // Check if we have a vector of constants on RHS that we can select as the
1433 // immediate form.
1434 Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1435
1436 unsigned Opc = 0;
1437 if (Ty == LLT::vector(2, 64)) {
1438 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1439 } else if (Ty == LLT::vector(4, 32)) {
1440 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1441 } else if (Ty == LLT::vector(2, 32)) {
1442 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1443 } else {
1444 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1445 return false;
1446 }
1447
1448 MachineIRBuilder MIB(I);
1449 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1450 if (ImmVal)
1451 Shl.addImm(*ImmVal);
1452 else
1453 Shl.addUse(Src2Reg);
1454 constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
1455 I.eraseFromParent();
1456 return true;
1457 }
1458
selectVectorASHR(MachineInstr & I,MachineRegisterInfo & MRI) const1459 bool AArch64InstructionSelector::selectVectorASHR(
1460 MachineInstr &I, MachineRegisterInfo &MRI) const {
1461 assert(I.getOpcode() == TargetOpcode::G_ASHR);
1462 Register DstReg = I.getOperand(0).getReg();
1463 const LLT Ty = MRI.getType(DstReg);
1464 Register Src1Reg = I.getOperand(1).getReg();
1465 Register Src2Reg = I.getOperand(2).getReg();
1466
1467 if (!Ty.isVector())
1468 return false;
1469
1470 // There is not a shift right register instruction, but the shift left
1471 // register instruction takes a signed value, where negative numbers specify a
1472 // right shift.
1473
1474 unsigned Opc = 0;
1475 unsigned NegOpc = 0;
1476 const TargetRegisterClass *RC = nullptr;
1477 if (Ty == LLT::vector(2, 64)) {
1478 Opc = AArch64::SSHLv2i64;
1479 NegOpc = AArch64::NEGv2i64;
1480 RC = &AArch64::FPR128RegClass;
1481 } else if (Ty == LLT::vector(4, 32)) {
1482 Opc = AArch64::SSHLv4i32;
1483 NegOpc = AArch64::NEGv4i32;
1484 RC = &AArch64::FPR128RegClass;
1485 } else if (Ty == LLT::vector(2, 32)) {
1486 Opc = AArch64::SSHLv2i32;
1487 NegOpc = AArch64::NEGv2i32;
1488 RC = &AArch64::FPR64RegClass;
1489 } else {
1490 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1491 return false;
1492 }
1493
1494 MachineIRBuilder MIB(I);
1495 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1496 constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1497 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1498 constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1499 I.eraseFromParent();
1500 return true;
1501 }
1502
selectVaStartAAPCS(MachineInstr & I,MachineFunction & MF,MachineRegisterInfo & MRI) const1503 bool AArch64InstructionSelector::selectVaStartAAPCS(
1504 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1505 return false;
1506 }
1507
selectVaStartDarwin(MachineInstr & I,MachineFunction & MF,MachineRegisterInfo & MRI) const1508 bool AArch64InstructionSelector::selectVaStartDarwin(
1509 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1510 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1511 Register ListReg = I.getOperand(0).getReg();
1512
1513 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1514
1515 auto MIB =
1516 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1517 .addDef(ArgsAddrReg)
1518 .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1519 .addImm(0)
1520 .addImm(0);
1521
1522 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1523
1524 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1525 .addUse(ArgsAddrReg)
1526 .addUse(ListReg)
1527 .addImm(0)
1528 .addMemOperand(*I.memoperands_begin());
1529
1530 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1531 I.eraseFromParent();
1532 return true;
1533 }
1534
materializeLargeCMVal(MachineInstr & I,const Value * V,unsigned OpFlags) const1535 void AArch64InstructionSelector::materializeLargeCMVal(
1536 MachineInstr &I, const Value *V, unsigned OpFlags) const {
1537 MachineBasicBlock &MBB = *I.getParent();
1538 MachineFunction &MF = *MBB.getParent();
1539 MachineRegisterInfo &MRI = MF.getRegInfo();
1540 MachineIRBuilder MIB(I);
1541
1542 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1543 MovZ->addOperand(MF, I.getOperand(1));
1544 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1545 AArch64II::MO_NC);
1546 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1547 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1548
1549 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1550 Register ForceDstReg) {
1551 Register DstReg = ForceDstReg
1552 ? ForceDstReg
1553 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1554 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1555 if (auto *GV = dyn_cast<GlobalValue>(V)) {
1556 MovI->addOperand(MF, MachineOperand::CreateGA(
1557 GV, MovZ->getOperand(1).getOffset(), Flags));
1558 } else {
1559 MovI->addOperand(
1560 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1561 MovZ->getOperand(1).getOffset(), Flags));
1562 }
1563 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1564 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
1565 return DstReg;
1566 };
1567 Register DstReg = BuildMovK(MovZ.getReg(0),
1568 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
1569 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1570 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1571 return;
1572 }
1573
preISelLower(MachineInstr & I)1574 bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
1575 MachineBasicBlock &MBB = *I.getParent();
1576 MachineFunction &MF = *MBB.getParent();
1577 MachineRegisterInfo &MRI = MF.getRegInfo();
1578
1579 switch (I.getOpcode()) {
1580 case TargetOpcode::G_SHL:
1581 case TargetOpcode::G_ASHR:
1582 case TargetOpcode::G_LSHR: {
1583 // These shifts are legalized to have 64 bit shift amounts because we want
1584 // to take advantage of the existing imported selection patterns that assume
1585 // the immediates are s64s. However, if the shifted type is 32 bits and for
1586 // some reason we receive input GMIR that has an s64 shift amount that's not
1587 // a G_CONSTANT, insert a truncate so that we can still select the s32
1588 // register-register variant.
1589 Register SrcReg = I.getOperand(1).getReg();
1590 Register ShiftReg = I.getOperand(2).getReg();
1591 const LLT ShiftTy = MRI.getType(ShiftReg);
1592 const LLT SrcTy = MRI.getType(SrcReg);
1593 if (SrcTy.isVector())
1594 return false;
1595 assert(!ShiftTy.isVector() && "unexpected vector shift ty");
1596 if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
1597 return false;
1598 auto *AmtMI = MRI.getVRegDef(ShiftReg);
1599 assert(AmtMI && "could not find a vreg definition for shift amount");
1600 if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
1601 // Insert a subregister copy to implement a 64->32 trunc
1602 MachineIRBuilder MIB(I);
1603 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
1604 .addReg(ShiftReg, 0, AArch64::sub_32);
1605 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1606 I.getOperand(2).setReg(Trunc.getReg(0));
1607 }
1608 return true;
1609 }
1610 case TargetOpcode::G_STORE:
1611 return contractCrossBankCopyIntoStore(I, MRI);
1612 case TargetOpcode::G_PTR_ADD:
1613 return convertPtrAddToAdd(I, MRI);
1614 case TargetOpcode::G_LOAD: {
1615 // For scalar loads of pointers, we try to convert the dest type from p0
1616 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
1617 // conversion, this should be ok because all users should have been
1618 // selected already, so the type doesn't matter for them.
1619 Register DstReg = I.getOperand(0).getReg();
1620 const LLT DstTy = MRI.getType(DstReg);
1621 if (!DstTy.isPointer())
1622 return false;
1623 MRI.setType(DstReg, LLT::scalar(64));
1624 return true;
1625 }
1626 default:
1627 return false;
1628 }
1629 }
1630
1631 /// This lowering tries to look for G_PTR_ADD instructions and then converts
1632 /// them to a standard G_ADD with a COPY on the source.
1633 ///
1634 /// The motivation behind this is to expose the add semantics to the imported
1635 /// tablegen patterns. We shouldn't need to check for uses being loads/stores,
1636 /// because the selector works bottom up, uses before defs. By the time we
1637 /// end up trying to select a G_PTR_ADD, we should have already attempted to
1638 /// fold this into addressing modes and were therefore unsuccessful.
convertPtrAddToAdd(MachineInstr & I,MachineRegisterInfo & MRI)1639 bool AArch64InstructionSelector::convertPtrAddToAdd(
1640 MachineInstr &I, MachineRegisterInfo &MRI) {
1641 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1642 Register DstReg = I.getOperand(0).getReg();
1643 Register AddOp1Reg = I.getOperand(1).getReg();
1644 const LLT PtrTy = MRI.getType(DstReg);
1645 if (PtrTy.getAddressSpace() != 0)
1646 return false;
1647
1648 MachineIRBuilder MIB(I);
1649 const LLT CastPtrTy = PtrTy.isVector() ? LLT::vector(2, 64) : LLT::scalar(64);
1650 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
1651 // Set regbanks on the registers.
1652 if (PtrTy.isVector())
1653 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
1654 else
1655 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1656
1657 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
1658 // %dst(intty) = G_ADD %intbase, off
1659 I.setDesc(TII.get(TargetOpcode::G_ADD));
1660 MRI.setType(DstReg, CastPtrTy);
1661 I.getOperand(1).setReg(PtrToInt.getReg(0));
1662 if (!select(*PtrToInt)) {
1663 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
1664 return false;
1665 }
1666 return true;
1667 }
1668
earlySelectSHL(MachineInstr & I,MachineRegisterInfo & MRI) const1669 bool AArch64InstructionSelector::earlySelectSHL(
1670 MachineInstr &I, MachineRegisterInfo &MRI) const {
1671 // We try to match the immediate variant of LSL, which is actually an alias
1672 // for a special case of UBFM. Otherwise, we fall back to the imported
1673 // selector which will match the register variant.
1674 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
1675 const auto &MO = I.getOperand(2);
1676 auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI);
1677 if (!VRegAndVal)
1678 return false;
1679
1680 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1681 if (DstTy.isVector())
1682 return false;
1683 bool Is64Bit = DstTy.getSizeInBits() == 64;
1684 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
1685 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
1686 MachineIRBuilder MIB(I);
1687
1688 if (!Imm1Fn || !Imm2Fn)
1689 return false;
1690
1691 auto NewI =
1692 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
1693 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
1694
1695 for (auto &RenderFn : *Imm1Fn)
1696 RenderFn(NewI);
1697 for (auto &RenderFn : *Imm2Fn)
1698 RenderFn(NewI);
1699
1700 I.eraseFromParent();
1701 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
1702 }
1703
contractCrossBankCopyIntoStore(MachineInstr & I,MachineRegisterInfo & MRI)1704 bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
1705 MachineInstr &I, MachineRegisterInfo &MRI) {
1706 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
1707 // If we're storing a scalar, it doesn't matter what register bank that
1708 // scalar is on. All that matters is the size.
1709 //
1710 // So, if we see something like this (with a 32-bit scalar as an example):
1711 //
1712 // %x:gpr(s32) = ... something ...
1713 // %y:fpr(s32) = COPY %x:gpr(s32)
1714 // G_STORE %y:fpr(s32)
1715 //
1716 // We can fix this up into something like this:
1717 //
1718 // G_STORE %x:gpr(s32)
1719 //
1720 // And then continue the selection process normally.
1721 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
1722 if (!DefDstReg.isValid())
1723 return false;
1724 LLT DefDstTy = MRI.getType(DefDstReg);
1725 Register StoreSrcReg = I.getOperand(0).getReg();
1726 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
1727
1728 // If we get something strange like a physical register, then we shouldn't
1729 // go any further.
1730 if (!DefDstTy.isValid())
1731 return false;
1732
1733 // Are the source and dst types the same size?
1734 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
1735 return false;
1736
1737 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
1738 RBI.getRegBank(DefDstReg, MRI, TRI))
1739 return false;
1740
1741 // We have a cross-bank copy, which is entering a store. Let's fold it.
1742 I.getOperand(0).setReg(DefDstReg);
1743 return true;
1744 }
1745
earlySelect(MachineInstr & I) const1746 bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
1747 assert(I.getParent() && "Instruction should be in a basic block!");
1748 assert(I.getParent()->getParent() && "Instruction should be in a function!");
1749
1750 MachineBasicBlock &MBB = *I.getParent();
1751 MachineFunction &MF = *MBB.getParent();
1752 MachineRegisterInfo &MRI = MF.getRegInfo();
1753
1754 switch (I.getOpcode()) {
1755 case TargetOpcode::G_SHL:
1756 return earlySelectSHL(I, MRI);
1757 case TargetOpcode::G_CONSTANT: {
1758 bool IsZero = false;
1759 if (I.getOperand(1).isCImm())
1760 IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
1761 else if (I.getOperand(1).isImm())
1762 IsZero = I.getOperand(1).getImm() == 0;
1763
1764 if (!IsZero)
1765 return false;
1766
1767 Register DefReg = I.getOperand(0).getReg();
1768 LLT Ty = MRI.getType(DefReg);
1769 if (Ty.getSizeInBits() == 64) {
1770 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
1771 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
1772 } else if (Ty.getSizeInBits() == 32) {
1773 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
1774 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
1775 } else
1776 return false;
1777
1778 I.setDesc(TII.get(TargetOpcode::COPY));
1779 return true;
1780 }
1781 default:
1782 return false;
1783 }
1784 }
1785
select(MachineInstr & I)1786 bool AArch64InstructionSelector::select(MachineInstr &I) {
1787 assert(I.getParent() && "Instruction should be in a basic block!");
1788 assert(I.getParent()->getParent() && "Instruction should be in a function!");
1789
1790 MachineBasicBlock &MBB = *I.getParent();
1791 MachineFunction &MF = *MBB.getParent();
1792 MachineRegisterInfo &MRI = MF.getRegInfo();
1793
1794 const AArch64Subtarget *Subtarget =
1795 &static_cast<const AArch64Subtarget &>(MF.getSubtarget());
1796 if (Subtarget->requiresStrictAlign()) {
1797 // We don't support this feature yet.
1798 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
1799 return false;
1800 }
1801
1802 unsigned Opcode = I.getOpcode();
1803 // G_PHI requires same handling as PHI
1804 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
1805 // Certain non-generic instructions also need some special handling.
1806
1807 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
1808 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1809
1810 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
1811 const Register DefReg = I.getOperand(0).getReg();
1812 const LLT DefTy = MRI.getType(DefReg);
1813
1814 const RegClassOrRegBank &RegClassOrBank =
1815 MRI.getRegClassOrRegBank(DefReg);
1816
1817 const TargetRegisterClass *DefRC
1818 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
1819 if (!DefRC) {
1820 if (!DefTy.isValid()) {
1821 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
1822 return false;
1823 }
1824 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
1825 DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
1826 if (!DefRC) {
1827 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
1828 return false;
1829 }
1830 }
1831
1832 I.setDesc(TII.get(TargetOpcode::PHI));
1833
1834 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
1835 }
1836
1837 if (I.isCopy())
1838 return selectCopy(I, TII, MRI, TRI, RBI);
1839
1840 return true;
1841 }
1842
1843
1844 if (I.getNumOperands() != I.getNumExplicitOperands()) {
1845 LLVM_DEBUG(
1846 dbgs() << "Generic instruction has unexpected implicit operands\n");
1847 return false;
1848 }
1849
1850 // Try to do some lowering before we start instruction selecting. These
1851 // lowerings are purely transformations on the input G_MIR and so selection
1852 // must continue after any modification of the instruction.
1853 if (preISelLower(I)) {
1854 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
1855 }
1856
1857 // There may be patterns where the importer can't deal with them optimally,
1858 // but does select it to a suboptimal sequence so our custom C++ selection
1859 // code later never has a chance to work on it. Therefore, we have an early
1860 // selection attempt here to give priority to certain selection routines
1861 // over the imported ones.
1862 if (earlySelect(I))
1863 return true;
1864
1865 if (selectImpl(I, *CoverageInfo))
1866 return true;
1867
1868 LLT Ty =
1869 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
1870
1871 MachineIRBuilder MIB(I);
1872
1873 switch (Opcode) {
1874 case TargetOpcode::G_BRCOND: {
1875 if (Ty.getSizeInBits() > 32) {
1876 // We shouldn't need this on AArch64, but it would be implemented as an
1877 // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the
1878 // bit being tested is < 32.
1879 LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Ty
1880 << ", expected at most 32-bits");
1881 return false;
1882 }
1883
1884 const Register CondReg = I.getOperand(0).getReg();
1885 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1886
1887 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1888 // instructions will not be produced, as they are conditional branch
1889 // instructions that do not set flags.
1890 if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
1891 return true;
1892
1893 if (ProduceNonFlagSettingCondBr) {
1894 auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
1895 .addUse(CondReg)
1896 .addImm(/*bit offset=*/0)
1897 .addMBB(DestMBB);
1898
1899 I.eraseFromParent();
1900 return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
1901 } else {
1902 auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
1903 .addDef(AArch64::WZR)
1904 .addUse(CondReg)
1905 .addImm(1);
1906 constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
1907 auto Bcc =
1908 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
1909 .addImm(AArch64CC::EQ)
1910 .addMBB(DestMBB);
1911
1912 I.eraseFromParent();
1913 return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
1914 }
1915 }
1916
1917 case TargetOpcode::G_BRINDIRECT: {
1918 I.setDesc(TII.get(AArch64::BR));
1919 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1920 }
1921
1922 case TargetOpcode::G_BRJT:
1923 return selectBrJT(I, MRI);
1924
1925 case AArch64::G_ADD_LOW: {
1926 // This op may have been separated from it's ADRP companion by the localizer
1927 // or some other code motion pass. Given that many CPUs will try to
1928 // macro fuse these operations anyway, select this into a MOVaddr pseudo
1929 // which will later be expanded into an ADRP+ADD pair after scheduling.
1930 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
1931 if (BaseMI->getOpcode() != AArch64::ADRP) {
1932 I.setDesc(TII.get(AArch64::ADDXri));
1933 I.addOperand(MachineOperand::CreateImm(0));
1934 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1935 }
1936 assert(TM.getCodeModel() == CodeModel::Small &&
1937 "Expected small code model");
1938 MachineIRBuilder MIB(I);
1939 auto Op1 = BaseMI->getOperand(1);
1940 auto Op2 = I.getOperand(2);
1941 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
1942 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
1943 Op1.getTargetFlags())
1944 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
1945 Op2.getTargetFlags());
1946 I.eraseFromParent();
1947 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
1948 }
1949
1950 case TargetOpcode::G_BSWAP: {
1951 // Handle vector types for G_BSWAP directly.
1952 Register DstReg = I.getOperand(0).getReg();
1953 LLT DstTy = MRI.getType(DstReg);
1954
1955 // We should only get vector types here; everything else is handled by the
1956 // importer right now.
1957 if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
1958 LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
1959 return false;
1960 }
1961
1962 // Only handle 4 and 2 element vectors for now.
1963 // TODO: 16-bit elements.
1964 unsigned NumElts = DstTy.getNumElements();
1965 if (NumElts != 4 && NumElts != 2) {
1966 LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
1967 return false;
1968 }
1969
1970 // Choose the correct opcode for the supported types. Right now, that's
1971 // v2s32, v4s32, and v2s64.
1972 unsigned Opc = 0;
1973 unsigned EltSize = DstTy.getElementType().getSizeInBits();
1974 if (EltSize == 32)
1975 Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
1976 : AArch64::REV32v16i8;
1977 else if (EltSize == 64)
1978 Opc = AArch64::REV64v16i8;
1979
1980 // We should always get something by the time we get here...
1981 assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
1982
1983 I.setDesc(TII.get(Opc));
1984 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1985 }
1986
1987 case TargetOpcode::G_FCONSTANT:
1988 case TargetOpcode::G_CONSTANT: {
1989 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
1990
1991 const LLT s8 = LLT::scalar(8);
1992 const LLT s16 = LLT::scalar(16);
1993 const LLT s32 = LLT::scalar(32);
1994 const LLT s64 = LLT::scalar(64);
1995 const LLT p0 = LLT::pointer(0, 64);
1996
1997 const Register DefReg = I.getOperand(0).getReg();
1998 const LLT DefTy = MRI.getType(DefReg);
1999 const unsigned DefSize = DefTy.getSizeInBits();
2000 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2001
2002 // FIXME: Redundant check, but even less readable when factored out.
2003 if (isFP) {
2004 if (Ty != s32 && Ty != s64) {
2005 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2006 << " constant, expected: " << s32 << " or " << s64
2007 << '\n');
2008 return false;
2009 }
2010
2011 if (RB.getID() != AArch64::FPRRegBankID) {
2012 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2013 << " constant on bank: " << RB
2014 << ", expected: FPR\n");
2015 return false;
2016 }
2017
2018 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2019 // can be sure tablegen works correctly and isn't rescued by this code.
2020 if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
2021 return false;
2022 } else {
2023 // s32 and s64 are covered by tablegen.
2024 if (Ty != p0 && Ty != s8 && Ty != s16) {
2025 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2026 << " constant, expected: " << s32 << ", " << s64
2027 << ", or " << p0 << '\n');
2028 return false;
2029 }
2030
2031 if (RB.getID() != AArch64::GPRRegBankID) {
2032 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2033 << " constant on bank: " << RB
2034 << ", expected: GPR\n");
2035 return false;
2036 }
2037 }
2038
2039 // We allow G_CONSTANT of types < 32b.
2040 const unsigned MovOpc =
2041 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2042
2043 if (isFP) {
2044 // Either emit a FMOV, or emit a copy to emit a normal mov.
2045 const TargetRegisterClass &GPRRC =
2046 DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
2047 const TargetRegisterClass &FPRRC =
2048 DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
2049
2050 // Can we use a FMOV instruction to represent the immediate?
2051 if (emitFMovForFConstant(I, MRI))
2052 return true;
2053
2054 // For 64b values, emit a constant pool load instead.
2055 if (DefSize == 64) {
2056 auto *FPImm = I.getOperand(1).getFPImm();
2057 MachineIRBuilder MIB(I);
2058 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2059 if (!LoadMI) {
2060 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2061 return false;
2062 }
2063 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2064 I.eraseFromParent();
2065 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2066 }
2067
2068 // Nope. Emit a copy and use a normal mov instead.
2069 const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
2070 MachineOperand &RegOp = I.getOperand(0);
2071 RegOp.setReg(DefGPRReg);
2072 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2073 MIB.buildCopy({DefReg}, {DefGPRReg});
2074
2075 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2076 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2077 return false;
2078 }
2079
2080 MachineOperand &ImmOp = I.getOperand(1);
2081 // FIXME: Is going through int64_t always correct?
2082 ImmOp.ChangeToImmediate(
2083 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
2084 } else if (I.getOperand(1).isCImm()) {
2085 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2086 I.getOperand(1).ChangeToImmediate(Val);
2087 } else if (I.getOperand(1).isImm()) {
2088 uint64_t Val = I.getOperand(1).getImm();
2089 I.getOperand(1).ChangeToImmediate(Val);
2090 }
2091
2092 I.setDesc(TII.get(MovOpc));
2093 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2094 return true;
2095 }
2096 case TargetOpcode::G_EXTRACT: {
2097 Register DstReg = I.getOperand(0).getReg();
2098 Register SrcReg = I.getOperand(1).getReg();
2099 LLT SrcTy = MRI.getType(SrcReg);
2100 LLT DstTy = MRI.getType(DstReg);
2101 (void)DstTy;
2102 unsigned SrcSize = SrcTy.getSizeInBits();
2103
2104 if (SrcTy.getSizeInBits() > 64) {
2105 // This should be an extract of an s128, which is like a vector extract.
2106 if (SrcTy.getSizeInBits() != 128)
2107 return false;
2108 // Only support extracting 64 bits from an s128 at the moment.
2109 if (DstTy.getSizeInBits() != 64)
2110 return false;
2111
2112 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2113 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2114 // Check we have the right regbank always.
2115 assert(SrcRB.getID() == AArch64::FPRRegBankID &&
2116 DstRB.getID() == AArch64::FPRRegBankID &&
2117 "Wrong extract regbank!");
2118 (void)SrcRB;
2119
2120 // Emit the same code as a vector extract.
2121 // Offset must be a multiple of 64.
2122 unsigned Offset = I.getOperand(2).getImm();
2123 if (Offset % 64 != 0)
2124 return false;
2125 unsigned LaneIdx = Offset / 64;
2126 MachineIRBuilder MIB(I);
2127 MachineInstr *Extract = emitExtractVectorElt(
2128 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2129 if (!Extract)
2130 return false;
2131 I.eraseFromParent();
2132 return true;
2133 }
2134
2135 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2136 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2137 Ty.getSizeInBits() - 1);
2138
2139 if (SrcSize < 64) {
2140 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
2141 "unexpected G_EXTRACT types");
2142 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2143 }
2144
2145 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2146 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2147 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2148 .addReg(DstReg, 0, AArch64::sub_32);
2149 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2150 AArch64::GPR32RegClass, MRI);
2151 I.getOperand(0).setReg(DstReg);
2152
2153 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2154 }
2155
2156 case TargetOpcode::G_INSERT: {
2157 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2158 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2159 unsigned DstSize = DstTy.getSizeInBits();
2160 // Larger inserts are vectors, same-size ones should be something else by
2161 // now (split up or turned into COPYs).
2162 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2163 return false;
2164
2165 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2166 unsigned LSB = I.getOperand(3).getImm();
2167 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2168 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2169 MachineInstrBuilder(MF, I).addImm(Width - 1);
2170
2171 if (DstSize < 64) {
2172 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
2173 "unexpected G_INSERT types");
2174 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2175 }
2176
2177 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2178 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2179 TII.get(AArch64::SUBREG_TO_REG))
2180 .addDef(SrcReg)
2181 .addImm(0)
2182 .addUse(I.getOperand(2).getReg())
2183 .addImm(AArch64::sub_32);
2184 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2185 AArch64::GPR32RegClass, MRI);
2186 I.getOperand(2).setReg(SrcReg);
2187
2188 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2189 }
2190 case TargetOpcode::G_FRAME_INDEX: {
2191 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2192 if (Ty != LLT::pointer(0, 64)) {
2193 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2194 << ", expected: " << LLT::pointer(0, 64) << '\n');
2195 return false;
2196 }
2197 I.setDesc(TII.get(AArch64::ADDXri));
2198
2199 // MOs for a #0 shifted immediate.
2200 I.addOperand(MachineOperand::CreateImm(0));
2201 I.addOperand(MachineOperand::CreateImm(0));
2202
2203 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2204 }
2205
2206 case TargetOpcode::G_GLOBAL_VALUE: {
2207 auto GV = I.getOperand(1).getGlobal();
2208 if (GV->isThreadLocal())
2209 return selectTLSGlobalValue(I, MRI);
2210
2211 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
2212 if (OpFlags & AArch64II::MO_GOT) {
2213 I.setDesc(TII.get(AArch64::LOADgot));
2214 I.getOperand(1).setTargetFlags(OpFlags);
2215 } else if (TM.getCodeModel() == CodeModel::Large) {
2216 // Materialize the global using movz/movk instructions.
2217 materializeLargeCMVal(I, GV, OpFlags);
2218 I.eraseFromParent();
2219 return true;
2220 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2221 I.setDesc(TII.get(AArch64::ADR));
2222 I.getOperand(1).setTargetFlags(OpFlags);
2223 } else {
2224 I.setDesc(TII.get(AArch64::MOVaddr));
2225 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2226 MachineInstrBuilder MIB(MF, I);
2227 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2228 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2229 }
2230 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2231 }
2232
2233 case TargetOpcode::G_ZEXTLOAD:
2234 case TargetOpcode::G_LOAD:
2235 case TargetOpcode::G_STORE: {
2236 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2237 MachineIRBuilder MIB(I);
2238
2239 LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
2240
2241 if (PtrTy != LLT::pointer(0, 64)) {
2242 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
2243 << ", expected: " << LLT::pointer(0, 64) << '\n');
2244 return false;
2245 }
2246
2247 auto &MemOp = **I.memoperands_begin();
2248 if (MemOp.isAtomic()) {
2249 // For now we just support s8 acquire loads to be able to compile stack
2250 // protector code.
2251 if (MemOp.getOrdering() == AtomicOrdering::Acquire &&
2252 MemOp.getSize() == 1) {
2253 I.setDesc(TII.get(AArch64::LDARB));
2254 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2255 }
2256 LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n");
2257 return false;
2258 }
2259 unsigned MemSizeInBits = MemOp.getSize() * 8;
2260
2261 const Register PtrReg = I.getOperand(1).getReg();
2262 #ifndef NDEBUG
2263 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2264 // Sanity-check the pointer register.
2265 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
2266 "Load/Store pointer operand isn't a GPR");
2267 assert(MRI.getType(PtrReg).isPointer() &&
2268 "Load/Store pointer operand isn't a pointer");
2269 #endif
2270
2271 const Register ValReg = I.getOperand(0).getReg();
2272 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2273
2274 const unsigned NewOpc =
2275 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2276 if (NewOpc == I.getOpcode())
2277 return false;
2278
2279 I.setDesc(TII.get(NewOpc));
2280
2281 uint64_t Offset = 0;
2282 auto *PtrMI = MRI.getVRegDef(PtrReg);
2283
2284 // Try to fold a GEP into our unsigned immediate addressing mode.
2285 if (PtrMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
2286 if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
2287 int64_t Imm = *COff;
2288 const unsigned Size = MemSizeInBits / 8;
2289 const unsigned Scale = Log2_32(Size);
2290 if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
2291 Register Ptr2Reg = PtrMI->getOperand(1).getReg();
2292 I.getOperand(1).setReg(Ptr2Reg);
2293 PtrMI = MRI.getVRegDef(Ptr2Reg);
2294 Offset = Imm / Size;
2295 }
2296 }
2297 }
2298
2299 // If we haven't folded anything into our addressing mode yet, try to fold
2300 // a frame index into the base+offset.
2301 if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX)
2302 I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
2303
2304 I.addOperand(MachineOperand::CreateImm(Offset));
2305
2306 // If we're storing a 0, use WZR/XZR.
2307 if (auto CVal = getConstantVRegVal(ValReg, MRI)) {
2308 if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) {
2309 if (I.getOpcode() == AArch64::STRWui)
2310 I.getOperand(0).setReg(AArch64::WZR);
2311 else if (I.getOpcode() == AArch64::STRXui)
2312 I.getOperand(0).setReg(AArch64::XZR);
2313 }
2314 }
2315
2316 if (IsZExtLoad) {
2317 // The zextload from a smaller type to i32 should be handled by the importer.
2318 if (MRI.getType(ValReg).getSizeInBits() != 64)
2319 return false;
2320 // If we have a ZEXTLOAD then change the load's type to be a narrower reg
2321 //and zero_extend with SUBREG_TO_REG.
2322 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2323 Register DstReg = I.getOperand(0).getReg();
2324 I.getOperand(0).setReg(LdReg);
2325
2326 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2327 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
2328 .addImm(0)
2329 .addUse(LdReg)
2330 .addImm(AArch64::sub_32);
2331 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2332 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
2333 MRI);
2334 }
2335 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2336 }
2337
2338 case TargetOpcode::G_SMULH:
2339 case TargetOpcode::G_UMULH: {
2340 // Reject the various things we don't support yet.
2341 if (unsupportedBinOp(I, RBI, MRI, TRI))
2342 return false;
2343
2344 const Register DefReg = I.getOperand(0).getReg();
2345 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2346
2347 if (RB.getID() != AArch64::GPRRegBankID) {
2348 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
2349 return false;
2350 }
2351
2352 if (Ty != LLT::scalar(64)) {
2353 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
2354 << ", expected: " << LLT::scalar(64) << '\n');
2355 return false;
2356 }
2357
2358 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
2359 : AArch64::UMULHrr;
2360 I.setDesc(TII.get(NewOpc));
2361
2362 // Now that we selected an opcode, we need to constrain the register
2363 // operands to use appropriate classes.
2364 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2365 }
2366 case TargetOpcode::G_FADD:
2367 case TargetOpcode::G_FSUB:
2368 case TargetOpcode::G_FMUL:
2369 case TargetOpcode::G_FDIV:
2370
2371 case TargetOpcode::G_ASHR:
2372 if (MRI.getType(I.getOperand(0).getReg()).isVector())
2373 return selectVectorASHR(I, MRI);
2374 LLVM_FALLTHROUGH;
2375 case TargetOpcode::G_SHL:
2376 if (Opcode == TargetOpcode::G_SHL &&
2377 MRI.getType(I.getOperand(0).getReg()).isVector())
2378 return selectVectorSHL(I, MRI);
2379 LLVM_FALLTHROUGH;
2380 case TargetOpcode::G_OR:
2381 case TargetOpcode::G_LSHR: {
2382 // Reject the various things we don't support yet.
2383 if (unsupportedBinOp(I, RBI, MRI, TRI))
2384 return false;
2385
2386 const unsigned OpSize = Ty.getSizeInBits();
2387
2388 const Register DefReg = I.getOperand(0).getReg();
2389 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2390
2391 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
2392 if (NewOpc == I.getOpcode())
2393 return false;
2394
2395 I.setDesc(TII.get(NewOpc));
2396 // FIXME: Should the type be always reset in setDesc?
2397
2398 // Now that we selected an opcode, we need to constrain the register
2399 // operands to use appropriate classes.
2400 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2401 }
2402
2403 case TargetOpcode::G_PTR_ADD: {
2404 MachineIRBuilder MIRBuilder(I);
2405 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2),
2406 MIRBuilder);
2407 I.eraseFromParent();
2408 return true;
2409 }
2410 case TargetOpcode::G_UADDO: {
2411 // TODO: Support other types.
2412 unsigned OpSize = Ty.getSizeInBits();
2413 if (OpSize != 32 && OpSize != 64) {
2414 LLVM_DEBUG(
2415 dbgs()
2416 << "G_UADDO currently only supported for 32 and 64 b types.\n");
2417 return false;
2418 }
2419
2420 // TODO: Support vectors.
2421 if (Ty.isVector()) {
2422 LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n");
2423 return false;
2424 }
2425
2426 // Add and set the set condition flag.
2427 unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr;
2428 MachineIRBuilder MIRBuilder(I);
2429 auto AddsMI = MIRBuilder.buildInstr(AddsOpc, {I.getOperand(0)},
2430 {I.getOperand(2), I.getOperand(3)});
2431 constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI);
2432
2433 // Now, put the overflow result in the register given by the first operand
2434 // to the G_UADDO. CSINC increments the result when the predicate is false,
2435 // so to get the increment when it's true, we need to use the inverse. In
2436 // this case, we want to increment when carry is set.
2437 auto CsetMI = MIRBuilder
2438 .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
2439 {Register(AArch64::WZR), Register(AArch64::WZR)})
2440 .addImm(getInvertedCondCode(AArch64CC::HS));
2441 constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
2442 I.eraseFromParent();
2443 return true;
2444 }
2445
2446 case TargetOpcode::G_PTRMASK: {
2447 Register MaskReg = I.getOperand(2).getReg();
2448 Optional<int64_t> MaskVal = getConstantVRegVal(MaskReg, MRI);
2449 // TODO: Implement arbitrary cases
2450 if (!MaskVal || !isShiftedMask_64(*MaskVal))
2451 return false;
2452
2453 uint64_t Mask = *MaskVal;
2454 I.setDesc(TII.get(AArch64::ANDXri));
2455 I.getOperand(2).ChangeToImmediate(
2456 AArch64_AM::encodeLogicalImmediate(Mask, 64));
2457
2458 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2459 }
2460 case TargetOpcode::G_PTRTOINT:
2461 case TargetOpcode::G_TRUNC: {
2462 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2463 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2464
2465 const Register DstReg = I.getOperand(0).getReg();
2466 const Register SrcReg = I.getOperand(1).getReg();
2467
2468 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2469 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2470
2471 if (DstRB.getID() != SrcRB.getID()) {
2472 LLVM_DEBUG(
2473 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
2474 return false;
2475 }
2476
2477 if (DstRB.getID() == AArch64::GPRRegBankID) {
2478 const TargetRegisterClass *DstRC =
2479 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
2480 if (!DstRC)
2481 return false;
2482
2483 const TargetRegisterClass *SrcRC =
2484 getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
2485 if (!SrcRC)
2486 return false;
2487
2488 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
2489 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
2490 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
2491 return false;
2492 }
2493
2494 if (DstRC == SrcRC) {
2495 // Nothing to be done
2496 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
2497 SrcTy == LLT::scalar(64)) {
2498 llvm_unreachable("TableGen can import this case");
2499 return false;
2500 } else if (DstRC == &AArch64::GPR32RegClass &&
2501 SrcRC == &AArch64::GPR64RegClass) {
2502 I.getOperand(1).setSubReg(AArch64::sub_32);
2503 } else {
2504 LLVM_DEBUG(
2505 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
2506 return false;
2507 }
2508
2509 I.setDesc(TII.get(TargetOpcode::COPY));
2510 return true;
2511 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
2512 if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
2513 I.setDesc(TII.get(AArch64::XTNv4i16));
2514 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2515 return true;
2516 }
2517
2518 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
2519 MachineIRBuilder MIB(I);
2520 MachineInstr *Extract = emitExtractVectorElt(
2521 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
2522 if (!Extract)
2523 return false;
2524 I.eraseFromParent();
2525 return true;
2526 }
2527
2528 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
2529 if (Opcode == TargetOpcode::G_PTRTOINT) {
2530 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
2531 I.setDesc(TII.get(TargetOpcode::COPY));
2532 return true;
2533 }
2534 }
2535
2536 return false;
2537 }
2538
2539 case TargetOpcode::G_ANYEXT: {
2540 const Register DstReg = I.getOperand(0).getReg();
2541 const Register SrcReg = I.getOperand(1).getReg();
2542
2543 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
2544 if (RBDst.getID() != AArch64::GPRRegBankID) {
2545 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
2546 << ", expected: GPR\n");
2547 return false;
2548 }
2549
2550 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
2551 if (RBSrc.getID() != AArch64::GPRRegBankID) {
2552 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
2553 << ", expected: GPR\n");
2554 return false;
2555 }
2556
2557 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
2558
2559 if (DstSize == 0) {
2560 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
2561 return false;
2562 }
2563
2564 if (DstSize != 64 && DstSize > 32) {
2565 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
2566 << ", expected: 32 or 64\n");
2567 return false;
2568 }
2569 // At this point G_ANYEXT is just like a plain COPY, but we need
2570 // to explicitly form the 64-bit value if any.
2571 if (DstSize > 32) {
2572 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
2573 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
2574 .addDef(ExtSrc)
2575 .addImm(0)
2576 .addUse(SrcReg)
2577 .addImm(AArch64::sub_32);
2578 I.getOperand(1).setReg(ExtSrc);
2579 }
2580 return selectCopy(I, TII, MRI, TRI, RBI);
2581 }
2582
2583 case TargetOpcode::G_ZEXT:
2584 case TargetOpcode::G_SEXT_INREG:
2585 case TargetOpcode::G_SEXT: {
2586 unsigned Opcode = I.getOpcode();
2587 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
2588 const Register DefReg = I.getOperand(0).getReg();
2589 Register SrcReg = I.getOperand(1).getReg();
2590 const LLT DstTy = MRI.getType(DefReg);
2591 const LLT SrcTy = MRI.getType(SrcReg);
2592 unsigned DstSize = DstTy.getSizeInBits();
2593 unsigned SrcSize = SrcTy.getSizeInBits();
2594
2595 // SEXT_INREG has the same src reg size as dst, the size of the value to be
2596 // extended is encoded in the imm.
2597 if (Opcode == TargetOpcode::G_SEXT_INREG)
2598 SrcSize = I.getOperand(2).getImm();
2599
2600 if (DstTy.isVector())
2601 return false; // Should be handled by imported patterns.
2602
2603 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
2604 AArch64::GPRRegBankID &&
2605 "Unexpected ext regbank");
2606
2607 MachineIRBuilder MIB(I);
2608 MachineInstr *ExtI;
2609
2610 // First check if we're extending the result of a load which has a dest type
2611 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
2612 // GPR register on AArch64 and all loads which are smaller automatically
2613 // zero-extend the upper bits. E.g.
2614 // %v(s8) = G_LOAD %p, :: (load 1)
2615 // %v2(s32) = G_ZEXT %v(s8)
2616 if (!IsSigned) {
2617 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
2618 bool IsGPR =
2619 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
2620 if (LoadMI && IsGPR) {
2621 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
2622 unsigned BytesLoaded = MemOp->getSize();
2623 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
2624 return selectCopy(I, TII, MRI, TRI, RBI);
2625 }
2626
2627 // If we are zero extending from 32 bits to 64 bits, it's possible that
2628 // the instruction implicitly does the zero extend for us. In that case,
2629 // we can just emit a SUBREG_TO_REG.
2630 if (IsGPR && SrcSize == 32 && DstSize == 64) {
2631 // Unlike with the G_LOAD case, we don't want to look through copies
2632 // here.
2633 MachineInstr *Def = MRI.getVRegDef(SrcReg);
2634 if (Def && isDef32(*Def)) {
2635 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
2636 .addImm(0)
2637 .addUse(SrcReg)
2638 .addImm(AArch64::sub_32);
2639
2640 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
2641 MRI)) {
2642 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
2643 return false;
2644 }
2645
2646 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
2647 MRI)) {
2648 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
2649 return false;
2650 }
2651
2652 I.eraseFromParent();
2653 return true;
2654 }
2655 }
2656 }
2657
2658 if (DstSize == 64) {
2659 if (Opcode != TargetOpcode::G_SEXT_INREG) {
2660 // FIXME: Can we avoid manually doing this?
2661 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
2662 MRI)) {
2663 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
2664 << " operand\n");
2665 return false;
2666 }
2667 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
2668 {&AArch64::GPR64RegClass}, {})
2669 .addImm(0)
2670 .addUse(SrcReg)
2671 .addImm(AArch64::sub_32)
2672 .getReg(0);
2673 }
2674
2675 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
2676 {DefReg}, {SrcReg})
2677 .addImm(0)
2678 .addImm(SrcSize - 1);
2679 } else if (DstSize <= 32) {
2680 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
2681 {DefReg}, {SrcReg})
2682 .addImm(0)
2683 .addImm(SrcSize - 1);
2684 } else {
2685 return false;
2686 }
2687
2688 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
2689 I.eraseFromParent();
2690 return true;
2691 }
2692
2693 case TargetOpcode::G_SITOFP:
2694 case TargetOpcode::G_UITOFP:
2695 case TargetOpcode::G_FPTOSI:
2696 case TargetOpcode::G_FPTOUI: {
2697 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
2698 SrcTy = MRI.getType(I.getOperand(1).getReg());
2699 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
2700 if (NewOpc == Opcode)
2701 return false;
2702
2703 I.setDesc(TII.get(NewOpc));
2704 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2705
2706 return true;
2707 }
2708
2709 case TargetOpcode::G_FREEZE:
2710 return selectCopy(I, TII, MRI, TRI, RBI);
2711
2712 case TargetOpcode::G_INTTOPTR:
2713 // The importer is currently unable to import pointer types since they
2714 // didn't exist in SelectionDAG.
2715 return selectCopy(I, TII, MRI, TRI, RBI);
2716
2717 case TargetOpcode::G_BITCAST:
2718 // Imported SelectionDAG rules can handle every bitcast except those that
2719 // bitcast from a type to the same type. Ideally, these shouldn't occur
2720 // but we might not run an optimizer that deletes them. The other exception
2721 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
2722 // of them.
2723 return selectCopy(I, TII, MRI, TRI, RBI);
2724
2725 case TargetOpcode::G_SELECT: {
2726 if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
2727 LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
2728 << ", expected: " << LLT::scalar(1) << '\n');
2729 return false;
2730 }
2731
2732 const Register CondReg = I.getOperand(1).getReg();
2733 const Register TReg = I.getOperand(2).getReg();
2734 const Register FReg = I.getOperand(3).getReg();
2735
2736 if (tryOptSelect(I))
2737 return true;
2738
2739 Register CSelOpc = selectSelectOpc(I, MRI, RBI);
2740 MachineInstr &TstMI =
2741 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
2742 .addDef(AArch64::WZR)
2743 .addUse(CondReg)
2744 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2745
2746 MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc))
2747 .addDef(I.getOperand(0).getReg())
2748 .addUse(TReg)
2749 .addUse(FReg)
2750 .addImm(AArch64CC::NE);
2751
2752 constrainSelectedInstRegOperands(TstMI, TII, TRI, RBI);
2753 constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI);
2754
2755 I.eraseFromParent();
2756 return true;
2757 }
2758 case TargetOpcode::G_ICMP: {
2759 if (Ty.isVector())
2760 return selectVectorICmp(I, MRI);
2761
2762 if (Ty != LLT::scalar(32)) {
2763 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
2764 << ", expected: " << LLT::scalar(32) << '\n');
2765 return false;
2766 }
2767
2768 MachineIRBuilder MIRBuilder(I);
2769 MachineInstr *Cmp;
2770 CmpInst::Predicate Pred;
2771 std::tie(Cmp, Pred) = emitIntegerCompare(I.getOperand(2), I.getOperand(3),
2772 I.getOperand(1), MIRBuilder);
2773 if (!Cmp)
2774 return false;
2775 emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIRBuilder);
2776 I.eraseFromParent();
2777 return true;
2778 }
2779
2780 case TargetOpcode::G_FCMP: {
2781 if (Ty != LLT::scalar(32)) {
2782 LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty
2783 << ", expected: " << LLT::scalar(32) << '\n');
2784 return false;
2785 }
2786
2787 unsigned CmpOpc = selectFCMPOpc(I, MRI);
2788 if (!CmpOpc)
2789 return false;
2790
2791 // FIXME: regbank
2792
2793 AArch64CC::CondCode CC1, CC2;
2794 changeFCMPPredToAArch64CC(
2795 (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
2796
2797 // Partially build the compare. Decide if we need to add a use for the
2798 // third operand based off whether or not we're comparing against 0.0.
2799 auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
2800 .addUse(I.getOperand(2).getReg());
2801
2802 // If we don't have an immediate compare, then we need to add a use of the
2803 // register which wasn't used for the immediate.
2804 // Note that the immediate will always be the last operand.
2805 if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
2806 CmpMI = CmpMI.addUse(I.getOperand(3).getReg());
2807
2808 const Register DefReg = I.getOperand(0).getReg();
2809 Register Def1Reg = DefReg;
2810 if (CC2 != AArch64CC::AL)
2811 Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2812
2813 MachineInstr &CSetMI =
2814 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2815 .addDef(Def1Reg)
2816 .addUse(AArch64::WZR)
2817 .addUse(AArch64::WZR)
2818 .addImm(getInvertedCondCode(CC1));
2819
2820 if (CC2 != AArch64CC::AL) {
2821 Register Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2822 MachineInstr &CSet2MI =
2823 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2824 .addDef(Def2Reg)
2825 .addUse(AArch64::WZR)
2826 .addUse(AArch64::WZR)
2827 .addImm(getInvertedCondCode(CC2));
2828 MachineInstr &OrMI =
2829 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
2830 .addDef(DefReg)
2831 .addUse(Def1Reg)
2832 .addUse(Def2Reg);
2833 constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI);
2834 constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
2835 }
2836 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
2837 constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
2838
2839 I.eraseFromParent();
2840 return true;
2841 }
2842 case TargetOpcode::G_VASTART:
2843 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
2844 : selectVaStartAAPCS(I, MF, MRI);
2845 case TargetOpcode::G_INTRINSIC:
2846 return selectIntrinsic(I, MRI);
2847 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
2848 return selectIntrinsicWithSideEffects(I, MRI);
2849 case TargetOpcode::G_IMPLICIT_DEF: {
2850 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
2851 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2852 const Register DstReg = I.getOperand(0).getReg();
2853 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2854 const TargetRegisterClass *DstRC =
2855 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
2856 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
2857 return true;
2858 }
2859 case TargetOpcode::G_BLOCK_ADDR: {
2860 if (TM.getCodeModel() == CodeModel::Large) {
2861 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
2862 I.eraseFromParent();
2863 return true;
2864 } else {
2865 I.setDesc(TII.get(AArch64::MOVaddrBA));
2866 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
2867 I.getOperand(0).getReg())
2868 .addBlockAddress(I.getOperand(1).getBlockAddress(),
2869 /* Offset */ 0, AArch64II::MO_PAGE)
2870 .addBlockAddress(
2871 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
2872 AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
2873 I.eraseFromParent();
2874 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2875 }
2876 }
2877 case TargetOpcode::G_INTRINSIC_TRUNC:
2878 return selectIntrinsicTrunc(I, MRI);
2879 case TargetOpcode::G_INTRINSIC_ROUND:
2880 return selectIntrinsicRound(I, MRI);
2881 case TargetOpcode::G_BUILD_VECTOR:
2882 return selectBuildVector(I, MRI);
2883 case TargetOpcode::G_MERGE_VALUES:
2884 return selectMergeValues(I, MRI);
2885 case TargetOpcode::G_UNMERGE_VALUES:
2886 return selectUnmergeValues(I, MRI);
2887 case TargetOpcode::G_SHUFFLE_VECTOR:
2888 return selectShuffleVector(I, MRI);
2889 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
2890 return selectExtractElt(I, MRI);
2891 case TargetOpcode::G_INSERT_VECTOR_ELT:
2892 return selectInsertElt(I, MRI);
2893 case TargetOpcode::G_CONCAT_VECTORS:
2894 return selectConcatVectors(I, MRI);
2895 case TargetOpcode::G_JUMP_TABLE:
2896 return selectJumpTable(I, MRI);
2897 }
2898
2899 return false;
2900 }
2901
selectBrJT(MachineInstr & I,MachineRegisterInfo & MRI) const2902 bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
2903 MachineRegisterInfo &MRI) const {
2904 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
2905 Register JTAddr = I.getOperand(0).getReg();
2906 unsigned JTI = I.getOperand(1).getIndex();
2907 Register Index = I.getOperand(2).getReg();
2908 MachineIRBuilder MIB(I);
2909
2910 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2911 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
2912 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
2913 {TargetReg, ScratchReg}, {JTAddr, Index})
2914 .addJumpTableIndex(JTI);
2915 // Build the indirect branch.
2916 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
2917 I.eraseFromParent();
2918 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
2919 }
2920
selectJumpTable(MachineInstr & I,MachineRegisterInfo & MRI) const2921 bool AArch64InstructionSelector::selectJumpTable(
2922 MachineInstr &I, MachineRegisterInfo &MRI) const {
2923 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
2924 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
2925
2926 Register DstReg = I.getOperand(0).getReg();
2927 unsigned JTI = I.getOperand(1).getIndex();
2928 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
2929 MachineIRBuilder MIB(I);
2930 auto MovMI =
2931 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
2932 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
2933 .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
2934 I.eraseFromParent();
2935 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2936 }
2937
selectTLSGlobalValue(MachineInstr & I,MachineRegisterInfo & MRI) const2938 bool AArch64InstructionSelector::selectTLSGlobalValue(
2939 MachineInstr &I, MachineRegisterInfo &MRI) const {
2940 if (!STI.isTargetMachO())
2941 return false;
2942 MachineFunction &MF = *I.getParent()->getParent();
2943 MF.getFrameInfo().setAdjustsStack(true);
2944
2945 const GlobalValue &GV = *I.getOperand(1).getGlobal();
2946 MachineIRBuilder MIB(I);
2947
2948 MIB.buildInstr(AArch64::LOADgot, {AArch64::X0}, {})
2949 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
2950
2951 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
2952 {Register(AArch64::X0)})
2953 .addImm(0);
2954
2955 // TLS calls preserve all registers except those that absolutely must be
2956 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
2957 // silly).
2958 MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
2959 .addDef(AArch64::X0, RegState::Implicit)
2960 .addRegMask(TRI.getTLSCallPreservedMask());
2961
2962 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
2963 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
2964 MRI);
2965 I.eraseFromParent();
2966 return true;
2967 }
2968
selectIntrinsicTrunc(MachineInstr & I,MachineRegisterInfo & MRI) const2969 bool AArch64InstructionSelector::selectIntrinsicTrunc(
2970 MachineInstr &I, MachineRegisterInfo &MRI) const {
2971 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2972
2973 // Select the correct opcode.
2974 unsigned Opc = 0;
2975 if (!SrcTy.isVector()) {
2976 switch (SrcTy.getSizeInBits()) {
2977 default:
2978 case 16:
2979 Opc = AArch64::FRINTZHr;
2980 break;
2981 case 32:
2982 Opc = AArch64::FRINTZSr;
2983 break;
2984 case 64:
2985 Opc = AArch64::FRINTZDr;
2986 break;
2987 }
2988 } else {
2989 unsigned NumElts = SrcTy.getNumElements();
2990 switch (SrcTy.getElementType().getSizeInBits()) {
2991 default:
2992 break;
2993 case 16:
2994 if (NumElts == 4)
2995 Opc = AArch64::FRINTZv4f16;
2996 else if (NumElts == 8)
2997 Opc = AArch64::FRINTZv8f16;
2998 break;
2999 case 32:
3000 if (NumElts == 2)
3001 Opc = AArch64::FRINTZv2f32;
3002 else if (NumElts == 4)
3003 Opc = AArch64::FRINTZv4f32;
3004 break;
3005 case 64:
3006 if (NumElts == 2)
3007 Opc = AArch64::FRINTZv2f64;
3008 break;
3009 }
3010 }
3011
3012 if (!Opc) {
3013 // Didn't get an opcode above, bail.
3014 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
3015 return false;
3016 }
3017
3018 // Legalization would have set us up perfectly for this; we just need to
3019 // set the opcode and move on.
3020 I.setDesc(TII.get(Opc));
3021 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3022 }
3023
selectIntrinsicRound(MachineInstr & I,MachineRegisterInfo & MRI) const3024 bool AArch64InstructionSelector::selectIntrinsicRound(
3025 MachineInstr &I, MachineRegisterInfo &MRI) const {
3026 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3027
3028 // Select the correct opcode.
3029 unsigned Opc = 0;
3030 if (!SrcTy.isVector()) {
3031 switch (SrcTy.getSizeInBits()) {
3032 default:
3033 case 16:
3034 Opc = AArch64::FRINTAHr;
3035 break;
3036 case 32:
3037 Opc = AArch64::FRINTASr;
3038 break;
3039 case 64:
3040 Opc = AArch64::FRINTADr;
3041 break;
3042 }
3043 } else {
3044 unsigned NumElts = SrcTy.getNumElements();
3045 switch (SrcTy.getElementType().getSizeInBits()) {
3046 default:
3047 break;
3048 case 16:
3049 if (NumElts == 4)
3050 Opc = AArch64::FRINTAv4f16;
3051 else if (NumElts == 8)
3052 Opc = AArch64::FRINTAv8f16;
3053 break;
3054 case 32:
3055 if (NumElts == 2)
3056 Opc = AArch64::FRINTAv2f32;
3057 else if (NumElts == 4)
3058 Opc = AArch64::FRINTAv4f32;
3059 break;
3060 case 64:
3061 if (NumElts == 2)
3062 Opc = AArch64::FRINTAv2f64;
3063 break;
3064 }
3065 }
3066
3067 if (!Opc) {
3068 // Didn't get an opcode above, bail.
3069 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
3070 return false;
3071 }
3072
3073 // Legalization would have set us up perfectly for this; we just need to
3074 // set the opcode and move on.
3075 I.setDesc(TII.get(Opc));
3076 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3077 }
3078
selectVectorICmp(MachineInstr & I,MachineRegisterInfo & MRI) const3079 bool AArch64InstructionSelector::selectVectorICmp(
3080 MachineInstr &I, MachineRegisterInfo &MRI) const {
3081 Register DstReg = I.getOperand(0).getReg();
3082 LLT DstTy = MRI.getType(DstReg);
3083 Register SrcReg = I.getOperand(2).getReg();
3084 Register Src2Reg = I.getOperand(3).getReg();
3085 LLT SrcTy = MRI.getType(SrcReg);
3086
3087 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
3088 unsigned NumElts = DstTy.getNumElements();
3089
3090 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
3091 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
3092 // Third index is cc opcode:
3093 // 0 == eq
3094 // 1 == ugt
3095 // 2 == uge
3096 // 3 == ult
3097 // 4 == ule
3098 // 5 == sgt
3099 // 6 == sge
3100 // 7 == slt
3101 // 8 == sle
3102 // ne is done by negating 'eq' result.
3103
3104 // This table below assumes that for some comparisons the operands will be
3105 // commuted.
3106 // ult op == commute + ugt op
3107 // ule op == commute + uge op
3108 // slt op == commute + sgt op
3109 // sle op == commute + sge op
3110 unsigned PredIdx = 0;
3111 bool SwapOperands = false;
3112 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
3113 switch (Pred) {
3114 case CmpInst::ICMP_NE:
3115 case CmpInst::ICMP_EQ:
3116 PredIdx = 0;
3117 break;
3118 case CmpInst::ICMP_UGT:
3119 PredIdx = 1;
3120 break;
3121 case CmpInst::ICMP_UGE:
3122 PredIdx = 2;
3123 break;
3124 case CmpInst::ICMP_ULT:
3125 PredIdx = 3;
3126 SwapOperands = true;
3127 break;
3128 case CmpInst::ICMP_ULE:
3129 PredIdx = 4;
3130 SwapOperands = true;
3131 break;
3132 case CmpInst::ICMP_SGT:
3133 PredIdx = 5;
3134 break;
3135 case CmpInst::ICMP_SGE:
3136 PredIdx = 6;
3137 break;
3138 case CmpInst::ICMP_SLT:
3139 PredIdx = 7;
3140 SwapOperands = true;
3141 break;
3142 case CmpInst::ICMP_SLE:
3143 PredIdx = 8;
3144 SwapOperands = true;
3145 break;
3146 default:
3147 llvm_unreachable("Unhandled icmp predicate");
3148 return false;
3149 }
3150
3151 // This table obviously should be tablegen'd when we have our GISel native
3152 // tablegen selector.
3153
3154 static const unsigned OpcTable[4][4][9] = {
3155 {
3156 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3157 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3158 0 /* invalid */},
3159 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3160 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3161 0 /* invalid */},
3162 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3163 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3164 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3165 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3166 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3167 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3168 },
3169 {
3170 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3171 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3172 0 /* invalid */},
3173 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3174 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3175 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3176 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3177 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3178 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3179 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3180 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3181 0 /* invalid */}
3182 },
3183 {
3184 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3185 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3186 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3187 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3188 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3189 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3190 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3191 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3192 0 /* invalid */},
3193 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3194 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3195 0 /* invalid */}
3196 },
3197 {
3198 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3199 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3200 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3201 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3202 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3203 0 /* invalid */},
3204 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3205 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3206 0 /* invalid */},
3207 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3208 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3209 0 /* invalid */}
3210 },
3211 };
3212 unsigned EltIdx = Log2_32(SrcEltSize / 8);
3213 unsigned NumEltsIdx = Log2_32(NumElts / 2);
3214 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3215 if (!Opc) {
3216 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
3217 return false;
3218 }
3219
3220 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3221 const TargetRegisterClass *SrcRC =
3222 getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
3223 if (!SrcRC) {
3224 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3225 return false;
3226 }
3227
3228 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3229 if (SrcTy.getSizeInBits() == 128)
3230 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3231
3232 if (SwapOperands)
3233 std::swap(SrcReg, Src2Reg);
3234
3235 MachineIRBuilder MIB(I);
3236 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
3237 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3238
3239 // Invert if we had a 'ne' cc.
3240 if (NotOpc) {
3241 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
3242 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3243 } else {
3244 MIB.buildCopy(DstReg, Cmp.getReg(0));
3245 }
3246 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
3247 I.eraseFromParent();
3248 return true;
3249 }
3250
emitScalarToVector(unsigned EltSize,const TargetRegisterClass * DstRC,Register Scalar,MachineIRBuilder & MIRBuilder) const3251 MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3252 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3253 MachineIRBuilder &MIRBuilder) const {
3254 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3255
3256 auto BuildFn = [&](unsigned SubregIndex) {
3257 auto Ins =
3258 MIRBuilder
3259 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3260 .addImm(SubregIndex);
3261 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
3262 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
3263 return &*Ins;
3264 };
3265
3266 switch (EltSize) {
3267 case 16:
3268 return BuildFn(AArch64::hsub);
3269 case 32:
3270 return BuildFn(AArch64::ssub);
3271 case 64:
3272 return BuildFn(AArch64::dsub);
3273 default:
3274 return nullptr;
3275 }
3276 }
3277
selectMergeValues(MachineInstr & I,MachineRegisterInfo & MRI) const3278 bool AArch64InstructionSelector::selectMergeValues(
3279 MachineInstr &I, MachineRegisterInfo &MRI) const {
3280 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3281 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3282 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3283 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3284 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3285
3286 if (I.getNumOperands() != 3)
3287 return false;
3288
3289 // Merging 2 s64s into an s128.
3290 if (DstTy == LLT::scalar(128)) {
3291 if (SrcTy.getSizeInBits() != 64)
3292 return false;
3293 MachineIRBuilder MIB(I);
3294 Register DstReg = I.getOperand(0).getReg();
3295 Register Src1Reg = I.getOperand(1).getReg();
3296 Register Src2Reg = I.getOperand(2).getReg();
3297 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3298 MachineInstr *InsMI =
3299 emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
3300 if (!InsMI)
3301 return false;
3302 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3303 Src2Reg, /* LaneIdx */ 1, RB, MIB);
3304 if (!Ins2MI)
3305 return false;
3306 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3307 constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
3308 I.eraseFromParent();
3309 return true;
3310 }
3311
3312 if (RB.getID() != AArch64::GPRRegBankID)
3313 return false;
3314
3315 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3316 return false;
3317
3318 auto *DstRC = &AArch64::GPR64RegClass;
3319 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3320 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3321 TII.get(TargetOpcode::SUBREG_TO_REG))
3322 .addDef(SubToRegDef)
3323 .addImm(0)
3324 .addUse(I.getOperand(1).getReg())
3325 .addImm(AArch64::sub_32);
3326 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3327 // Need to anyext the second scalar before we can use bfm
3328 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3329 TII.get(TargetOpcode::SUBREG_TO_REG))
3330 .addDef(SubToRegDef2)
3331 .addImm(0)
3332 .addUse(I.getOperand(2).getReg())
3333 .addImm(AArch64::sub_32);
3334 MachineInstr &BFM =
3335 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3336 .addDef(I.getOperand(0).getReg())
3337 .addUse(SubToRegDef)
3338 .addUse(SubToRegDef2)
3339 .addImm(32)
3340 .addImm(31);
3341 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3342 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3343 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
3344 I.eraseFromParent();
3345 return true;
3346 }
3347
getLaneCopyOpcode(unsigned & CopyOpc,unsigned & ExtractSubReg,const unsigned EltSize)3348 static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3349 const unsigned EltSize) {
3350 // Choose a lane copy opcode and subregister based off of the size of the
3351 // vector's elements.
3352 switch (EltSize) {
3353 case 16:
3354 CopyOpc = AArch64::CPYi16;
3355 ExtractSubReg = AArch64::hsub;
3356 break;
3357 case 32:
3358 CopyOpc = AArch64::CPYi32;
3359 ExtractSubReg = AArch64::ssub;
3360 break;
3361 case 64:
3362 CopyOpc = AArch64::CPYi64;
3363 ExtractSubReg = AArch64::dsub;
3364 break;
3365 default:
3366 // Unknown size, bail out.
3367 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
3368 return false;
3369 }
3370 return true;
3371 }
3372
emitExtractVectorElt(Optional<Register> DstReg,const RegisterBank & DstRB,LLT ScalarTy,Register VecReg,unsigned LaneIdx,MachineIRBuilder & MIRBuilder) const3373 MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3374 Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3375 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3376 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3377 unsigned CopyOpc = 0;
3378 unsigned ExtractSubReg = 0;
3379 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3380 LLVM_DEBUG(
3381 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
3382 return nullptr;
3383 }
3384
3385 const TargetRegisterClass *DstRC =
3386 getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
3387 if (!DstRC) {
3388 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
3389 return nullptr;
3390 }
3391
3392 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3393 const LLT &VecTy = MRI.getType(VecReg);
3394 const TargetRegisterClass *VecRC =
3395 getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
3396 if (!VecRC) {
3397 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3398 return nullptr;
3399 }
3400
3401 // The register that we're going to copy into.
3402 Register InsertReg = VecReg;
3403 if (!DstReg)
3404 DstReg = MRI.createVirtualRegister(DstRC);
3405 // If the lane index is 0, we just use a subregister COPY.
3406 if (LaneIdx == 0) {
3407 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3408 .addReg(VecReg, 0, ExtractSubReg);
3409 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3410 return &*Copy;
3411 }
3412
3413 // Lane copies require 128-bit wide registers. If we're dealing with an
3414 // unpacked vector, then we need to move up to that width. Insert an implicit
3415 // def and a subregister insert to get us there.
3416 if (VecTy.getSizeInBits() != 128) {
3417 MachineInstr *ScalarToVector = emitScalarToVector(
3418 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3419 if (!ScalarToVector)
3420 return nullptr;
3421 InsertReg = ScalarToVector->getOperand(0).getReg();
3422 }
3423
3424 MachineInstr *LaneCopyMI =
3425 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3426 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
3427
3428 // Make sure that we actually constrain the initial copy.
3429 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3430 return LaneCopyMI;
3431 }
3432
selectExtractElt(MachineInstr & I,MachineRegisterInfo & MRI) const3433 bool AArch64InstructionSelector::selectExtractElt(
3434 MachineInstr &I, MachineRegisterInfo &MRI) const {
3435 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
3436 "unexpected opcode!");
3437 Register DstReg = I.getOperand(0).getReg();
3438 const LLT NarrowTy = MRI.getType(DstReg);
3439 const Register SrcReg = I.getOperand(1).getReg();
3440 const LLT WideTy = MRI.getType(SrcReg);
3441 (void)WideTy;
3442 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
3443 "source register size too small!");
3444 assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
3445
3446 // Need the lane index to determine the correct copy opcode.
3447 MachineOperand &LaneIdxOp = I.getOperand(2);
3448 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
3449
3450 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
3451 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
3452 return false;
3453 }
3454
3455 // Find the index to extract from.
3456 auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
3457 if (!VRegAndVal)
3458 return false;
3459 unsigned LaneIdx = VRegAndVal->Value;
3460
3461 MachineIRBuilder MIRBuilder(I);
3462
3463 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3464 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
3465 LaneIdx, MIRBuilder);
3466 if (!Extract)
3467 return false;
3468
3469 I.eraseFromParent();
3470 return true;
3471 }
3472
selectSplitVectorUnmerge(MachineInstr & I,MachineRegisterInfo & MRI) const3473 bool AArch64InstructionSelector::selectSplitVectorUnmerge(
3474 MachineInstr &I, MachineRegisterInfo &MRI) const {
3475 unsigned NumElts = I.getNumOperands() - 1;
3476 Register SrcReg = I.getOperand(NumElts).getReg();
3477 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3478 const LLT SrcTy = MRI.getType(SrcReg);
3479
3480 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
3481 if (SrcTy.getSizeInBits() > 128) {
3482 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
3483 return false;
3484 }
3485
3486 MachineIRBuilder MIB(I);
3487
3488 // We implement a split vector operation by treating the sub-vectors as
3489 // scalars and extracting them.
3490 const RegisterBank &DstRB =
3491 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
3492 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
3493 Register Dst = I.getOperand(OpIdx).getReg();
3494 MachineInstr *Extract =
3495 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
3496 if (!Extract)
3497 return false;
3498 }
3499 I.eraseFromParent();
3500 return true;
3501 }
3502
selectUnmergeValues(MachineInstr & I,MachineRegisterInfo & MRI) const3503 bool AArch64InstructionSelector::selectUnmergeValues(
3504 MachineInstr &I, MachineRegisterInfo &MRI) const {
3505 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
3506 "unexpected opcode");
3507
3508 // TODO: Handle unmerging into GPRs and from scalars to scalars.
3509 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
3510 AArch64::FPRRegBankID ||
3511 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3512 AArch64::FPRRegBankID) {
3513 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
3514 "currently unsupported.\n");
3515 return false;
3516 }
3517
3518 // The last operand is the vector source register, and every other operand is
3519 // a register to unpack into.
3520 unsigned NumElts = I.getNumOperands() - 1;
3521 Register SrcReg = I.getOperand(NumElts).getReg();
3522 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3523 const LLT WideTy = MRI.getType(SrcReg);
3524 (void)WideTy;
3525 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
3526 "can only unmerge from vector or s128 types!");
3527 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
3528 "source register size too small!");
3529
3530 if (!NarrowTy.isScalar())
3531 return selectSplitVectorUnmerge(I, MRI);
3532
3533 MachineIRBuilder MIB(I);
3534
3535 // Choose a lane copy opcode and subregister based off of the size of the
3536 // vector's elements.
3537 unsigned CopyOpc = 0;
3538 unsigned ExtractSubReg = 0;
3539 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
3540 return false;
3541
3542 // Set up for the lane copies.
3543 MachineBasicBlock &MBB = *I.getParent();
3544
3545 // Stores the registers we'll be copying from.
3546 SmallVector<Register, 4> InsertRegs;
3547
3548 // We'll use the first register twice, so we only need NumElts-1 registers.
3549 unsigned NumInsertRegs = NumElts - 1;
3550
3551 // If our elements fit into exactly 128 bits, then we can copy from the source
3552 // directly. Otherwise, we need to do a bit of setup with some subregister
3553 // inserts.
3554 if (NarrowTy.getSizeInBits() * NumElts == 128) {
3555 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
3556 } else {
3557 // No. We have to perform subregister inserts. For each insert, create an
3558 // implicit def and a subregister insert, and save the register we create.
3559 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
3560 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
3561 MachineInstr &ImpDefMI =
3562 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
3563 ImpDefReg);
3564
3565 // Now, create the subregister insert from SrcReg.
3566 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
3567 MachineInstr &InsMI =
3568 *BuildMI(MBB, I, I.getDebugLoc(),
3569 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
3570 .addUse(ImpDefReg)
3571 .addUse(SrcReg)
3572 .addImm(AArch64::dsub);
3573
3574 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
3575 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
3576
3577 // Save the register so that we can copy from it after.
3578 InsertRegs.push_back(InsertReg);
3579 }
3580 }
3581
3582 // Now that we've created any necessary subregister inserts, we can
3583 // create the copies.
3584 //
3585 // Perform the first copy separately as a subregister copy.
3586 Register CopyTo = I.getOperand(0).getReg();
3587 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
3588 .addReg(InsertRegs[0], 0, ExtractSubReg);
3589 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
3590
3591 // Now, perform the remaining copies as vector lane copies.
3592 unsigned LaneIdx = 1;
3593 for (Register InsReg : InsertRegs) {
3594 Register CopyTo = I.getOperand(LaneIdx).getReg();
3595 MachineInstr &CopyInst =
3596 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
3597 .addUse(InsReg)
3598 .addImm(LaneIdx);
3599 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
3600 ++LaneIdx;
3601 }
3602
3603 // Separately constrain the first copy's destination. Because of the
3604 // limitation in constrainOperandRegClass, we can't guarantee that this will
3605 // actually be constrained. So, do it ourselves using the second operand.
3606 const TargetRegisterClass *RC =
3607 MRI.getRegClassOrNull(I.getOperand(1).getReg());
3608 if (!RC) {
3609 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
3610 return false;
3611 }
3612
3613 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
3614 I.eraseFromParent();
3615 return true;
3616 }
3617
selectConcatVectors(MachineInstr & I,MachineRegisterInfo & MRI) const3618 bool AArch64InstructionSelector::selectConcatVectors(
3619 MachineInstr &I, MachineRegisterInfo &MRI) const {
3620 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
3621 "Unexpected opcode");
3622 Register Dst = I.getOperand(0).getReg();
3623 Register Op1 = I.getOperand(1).getReg();
3624 Register Op2 = I.getOperand(2).getReg();
3625 MachineIRBuilder MIRBuilder(I);
3626 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
3627 if (!ConcatMI)
3628 return false;
3629 I.eraseFromParent();
3630 return true;
3631 }
3632
3633 unsigned
emitConstantPoolEntry(const Constant * CPVal,MachineFunction & MF) const3634 AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
3635 MachineFunction &MF) const {
3636 Type *CPTy = CPVal->getType();
3637 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
3638
3639 MachineConstantPool *MCP = MF.getConstantPool();
3640 return MCP->getConstantPoolIndex(CPVal, Alignment);
3641 }
3642
emitLoadFromConstantPool(const Constant * CPVal,MachineIRBuilder & MIRBuilder) const3643 MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
3644 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
3645 unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
3646
3647 auto Adrp =
3648 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
3649 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
3650
3651 MachineInstr *LoadMI = nullptr;
3652 switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
3653 case 16:
3654 LoadMI =
3655 &*MIRBuilder
3656 .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
3657 .addConstantPoolIndex(CPIdx, 0,
3658 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3659 break;
3660 case 8:
3661 LoadMI = &*MIRBuilder
3662 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
3663 .addConstantPoolIndex(
3664 CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3665 break;
3666 default:
3667 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
3668 << *CPVal->getType());
3669 return nullptr;
3670 }
3671 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
3672 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
3673 return LoadMI;
3674 }
3675
3676 /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
3677 /// size and RB.
3678 static std::pair<unsigned, unsigned>
getInsertVecEltOpInfo(const RegisterBank & RB,unsigned EltSize)3679 getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
3680 unsigned Opc, SubregIdx;
3681 if (RB.getID() == AArch64::GPRRegBankID) {
3682 if (EltSize == 32) {
3683 Opc = AArch64::INSvi32gpr;
3684 SubregIdx = AArch64::ssub;
3685 } else if (EltSize == 64) {
3686 Opc = AArch64::INSvi64gpr;
3687 SubregIdx = AArch64::dsub;
3688 } else {
3689 llvm_unreachable("invalid elt size!");
3690 }
3691 } else {
3692 if (EltSize == 8) {
3693 Opc = AArch64::INSvi8lane;
3694 SubregIdx = AArch64::bsub;
3695 } else if (EltSize == 16) {
3696 Opc = AArch64::INSvi16lane;
3697 SubregIdx = AArch64::hsub;
3698 } else if (EltSize == 32) {
3699 Opc = AArch64::INSvi32lane;
3700 SubregIdx = AArch64::ssub;
3701 } else if (EltSize == 64) {
3702 Opc = AArch64::INSvi64lane;
3703 SubregIdx = AArch64::dsub;
3704 } else {
3705 llvm_unreachable("invalid elt size!");
3706 }
3707 }
3708 return std::make_pair(Opc, SubregIdx);
3709 }
3710
3711 MachineInstr *
emitADD(Register DefReg,MachineOperand & LHS,MachineOperand & RHS,MachineIRBuilder & MIRBuilder) const3712 AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
3713 MachineOperand &RHS,
3714 MachineIRBuilder &MIRBuilder) const {
3715 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3716 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3717 static const unsigned OpcTable[2][2]{{AArch64::ADDXrr, AArch64::ADDXri},
3718 {AArch64::ADDWrr, AArch64::ADDWri}};
3719 bool Is32Bit = MRI.getType(LHS.getReg()).getSizeInBits() == 32;
3720 auto ImmFns = selectArithImmed(RHS);
3721 unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
3722 auto AddMI = MIRBuilder.buildInstr(Opc, {DefReg}, {LHS});
3723
3724 // If we matched a valid constant immediate, add those operands.
3725 if (ImmFns) {
3726 for (auto &RenderFn : *ImmFns)
3727 RenderFn(AddMI);
3728 } else {
3729 AddMI.addUse(RHS.getReg());
3730 }
3731
3732 constrainSelectedInstRegOperands(*AddMI, TII, TRI, RBI);
3733 return &*AddMI;
3734 }
3735
3736 MachineInstr *
emitCMN(MachineOperand & LHS,MachineOperand & RHS,MachineIRBuilder & MIRBuilder) const3737 AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
3738 MachineIRBuilder &MIRBuilder) const {
3739 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3740 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3741 static const unsigned OpcTable[2][2]{{AArch64::ADDSXrr, AArch64::ADDSXri},
3742 {AArch64::ADDSWrr, AArch64::ADDSWri}};
3743 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
3744 auto ImmFns = selectArithImmed(RHS);
3745 unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
3746 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
3747
3748 auto CmpMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS});
3749
3750 // If we matched a valid constant immediate, add those operands.
3751 if (ImmFns) {
3752 for (auto &RenderFn : *ImmFns)
3753 RenderFn(CmpMI);
3754 } else {
3755 CmpMI.addUse(RHS.getReg());
3756 }
3757
3758 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3759 return &*CmpMI;
3760 }
3761
3762 MachineInstr *
emitTST(const Register & LHS,const Register & RHS,MachineIRBuilder & MIRBuilder) const3763 AArch64InstructionSelector::emitTST(const Register &LHS, const Register &RHS,
3764 MachineIRBuilder &MIRBuilder) const {
3765 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3766 unsigned RegSize = MRI.getType(LHS).getSizeInBits();
3767 bool Is32Bit = (RegSize == 32);
3768 static const unsigned OpcTable[2][2]{{AArch64::ANDSXrr, AArch64::ANDSXri},
3769 {AArch64::ANDSWrr, AArch64::ANDSWri}};
3770 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
3771
3772 // We might be able to fold in an immediate into the TST. We need to make sure
3773 // it's a logical immediate though, since ANDS requires that.
3774 auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI);
3775 bool IsImmForm = ValAndVReg.hasValue() &&
3776 AArch64_AM::isLogicalImmediate(ValAndVReg->Value, RegSize);
3777 unsigned Opc = OpcTable[Is32Bit][IsImmForm];
3778 auto TstMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS});
3779
3780 if (IsImmForm)
3781 TstMI.addImm(
3782 AArch64_AM::encodeLogicalImmediate(ValAndVReg->Value, RegSize));
3783 else
3784 TstMI.addUse(RHS);
3785
3786 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3787 return &*TstMI;
3788 }
3789
3790 std::pair<MachineInstr *, CmpInst::Predicate>
emitIntegerCompare(MachineOperand & LHS,MachineOperand & RHS,MachineOperand & Predicate,MachineIRBuilder & MIRBuilder) const3791 AArch64InstructionSelector::emitIntegerCompare(
3792 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
3793 MachineIRBuilder &MIRBuilder) const {
3794 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3795 assert(Predicate.isPredicate() && "Expected predicate?");
3796 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3797
3798 CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
3799
3800 // Fold the compare if possible.
3801 MachineInstr *FoldCmp =
3802 tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder);
3803 if (FoldCmp)
3804 return {FoldCmp, P};
3805
3806 // Can't fold into a CMN. Just emit a normal compare.
3807 unsigned CmpOpc = 0;
3808 Register ZReg;
3809
3810 LLT CmpTy = MRI.getType(LHS.getReg());
3811 assert((CmpTy.isScalar() || CmpTy.isPointer()) &&
3812 "Expected scalar or pointer");
3813 if (CmpTy == LLT::scalar(32)) {
3814 CmpOpc = AArch64::SUBSWrr;
3815 ZReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3816 } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
3817 CmpOpc = AArch64::SUBSXrr;
3818 ZReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3819 } else {
3820 return {nullptr, CmpInst::Predicate::BAD_ICMP_PREDICATE};
3821 }
3822
3823 // Try to match immediate forms.
3824 MachineInstr *ImmedCmp =
3825 tryOptArithImmedIntegerCompare(LHS, RHS, P, MIRBuilder);
3826 if (ImmedCmp)
3827 return {ImmedCmp, P};
3828
3829 // If we don't have an immediate, we may have a shift which can be folded
3830 // into the compare.
3831 MachineInstr *ShiftedCmp = tryOptArithShiftedCompare(LHS, RHS, MIRBuilder);
3832 if (ShiftedCmp)
3833 return {ShiftedCmp, P};
3834
3835 auto CmpMI =
3836 MIRBuilder.buildInstr(CmpOpc, {ZReg}, {LHS.getReg(), RHS.getReg()});
3837 // Make sure that we can constrain the compare that we emitted.
3838 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3839 return {&*CmpMI, P};
3840 }
3841
emitVectorConcat(Optional<Register> Dst,Register Op1,Register Op2,MachineIRBuilder & MIRBuilder) const3842 MachineInstr *AArch64InstructionSelector::emitVectorConcat(
3843 Optional<Register> Dst, Register Op1, Register Op2,
3844 MachineIRBuilder &MIRBuilder) const {
3845 // We implement a vector concat by:
3846 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
3847 // 2. Insert the upper vector into the destination's upper element
3848 // TODO: some of this code is common with G_BUILD_VECTOR handling.
3849 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3850
3851 const LLT Op1Ty = MRI.getType(Op1);
3852 const LLT Op2Ty = MRI.getType(Op2);
3853
3854 if (Op1Ty != Op2Ty) {
3855 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
3856 return nullptr;
3857 }
3858 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
3859
3860 if (Op1Ty.getSizeInBits() >= 128) {
3861 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
3862 return nullptr;
3863 }
3864
3865 // At the moment we just support 64 bit vector concats.
3866 if (Op1Ty.getSizeInBits() != 64) {
3867 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
3868 return nullptr;
3869 }
3870
3871 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
3872 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
3873 const TargetRegisterClass *DstRC =
3874 getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
3875
3876 MachineInstr *WidenedOp1 =
3877 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
3878 MachineInstr *WidenedOp2 =
3879 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
3880 if (!WidenedOp1 || !WidenedOp2) {
3881 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
3882 return nullptr;
3883 }
3884
3885 // Now do the insert of the upper element.
3886 unsigned InsertOpc, InsSubRegIdx;
3887 std::tie(InsertOpc, InsSubRegIdx) =
3888 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
3889
3890 if (!Dst)
3891 Dst = MRI.createVirtualRegister(DstRC);
3892 auto InsElt =
3893 MIRBuilder
3894 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
3895 .addImm(1) /* Lane index */
3896 .addUse(WidenedOp2->getOperand(0).getReg())
3897 .addImm(0);
3898 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
3899 return &*InsElt;
3900 }
3901
emitFMovForFConstant(MachineInstr & I,MachineRegisterInfo & MRI) const3902 MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
3903 MachineInstr &I, MachineRegisterInfo &MRI) const {
3904 assert(I.getOpcode() == TargetOpcode::G_FCONSTANT &&
3905 "Expected a G_FCONSTANT!");
3906 MachineOperand &ImmOp = I.getOperand(1);
3907 unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
3908
3909 // Only handle 32 and 64 bit defs for now.
3910 if (DefSize != 32 && DefSize != 64)
3911 return nullptr;
3912
3913 // Don't handle null values using FMOV.
3914 if (ImmOp.getFPImm()->isNullValue())
3915 return nullptr;
3916
3917 // Get the immediate representation for the FMOV.
3918 const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF();
3919 int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF)
3920 : AArch64_AM::getFP64Imm(ImmValAPF);
3921
3922 // If this is -1, it means the immediate can't be represented as the requested
3923 // floating point value. Bail.
3924 if (Imm == -1)
3925 return nullptr;
3926
3927 // Update MI to represent the new FMOV instruction, constrain it, and return.
3928 ImmOp.ChangeToImmediate(Imm);
3929 unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi;
3930 I.setDesc(TII.get(MovOpc));
3931 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3932 return &I;
3933 }
3934
3935 MachineInstr *
emitCSetForICMP(Register DefReg,unsigned Pred,MachineIRBuilder & MIRBuilder) const3936 AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
3937 MachineIRBuilder &MIRBuilder) const {
3938 // CSINC increments the result when the predicate is false. Invert it.
3939 const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
3940 CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
3941 auto I =
3942 MIRBuilder
3943 .buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)})
3944 .addImm(InvCC);
3945 constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
3946 return &*I;
3947 }
3948
tryOptSelect(MachineInstr & I) const3949 bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
3950 MachineIRBuilder MIB(I);
3951 MachineRegisterInfo &MRI = *MIB.getMRI();
3952 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
3953
3954 // We want to recognize this pattern:
3955 //
3956 // $z = G_FCMP pred, $x, $y
3957 // ...
3958 // $w = G_SELECT $z, $a, $b
3959 //
3960 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
3961 // some copies/truncs in between.)
3962 //
3963 // If we see this, then we can emit something like this:
3964 //
3965 // fcmp $x, $y
3966 // fcsel $w, $a, $b, pred
3967 //
3968 // Rather than emitting both of the rather long sequences in the standard
3969 // G_FCMP/G_SELECT select methods.
3970
3971 // First, check if the condition is defined by a compare.
3972 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
3973 while (CondDef) {
3974 // We can only fold if all of the defs have one use.
3975 Register CondDefReg = CondDef->getOperand(0).getReg();
3976 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
3977 // Unless it's another select.
3978 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
3979 if (CondDef == &UI)
3980 continue;
3981 if (UI.getOpcode() != TargetOpcode::G_SELECT)
3982 return false;
3983 }
3984 }
3985
3986 // We can skip over G_TRUNC since the condition is 1-bit.
3987 // Truncating/extending can have no impact on the value.
3988 unsigned Opc = CondDef->getOpcode();
3989 if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
3990 break;
3991
3992 // Can't see past copies from physregs.
3993 if (Opc == TargetOpcode::COPY &&
3994 Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
3995 return false;
3996
3997 CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
3998 }
3999
4000 // Is the condition defined by a compare?
4001 if (!CondDef)
4002 return false;
4003
4004 unsigned CondOpc = CondDef->getOpcode();
4005 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
4006 return false;
4007
4008 AArch64CC::CondCode CondCode;
4009 if (CondOpc == TargetOpcode::G_ICMP) {
4010 MachineInstr *Cmp;
4011 CmpInst::Predicate Pred;
4012
4013 std::tie(Cmp, Pred) =
4014 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
4015 CondDef->getOperand(1), MIB);
4016
4017 if (!Cmp) {
4018 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
4019 return false;
4020 }
4021
4022 // Have to collect the CondCode after emitIntegerCompare, since it can
4023 // update the predicate.
4024 CondCode = changeICMPPredToAArch64CC(Pred);
4025 } else {
4026 // Get the condition code for the select.
4027 AArch64CC::CondCode CondCode2;
4028 changeFCMPPredToAArch64CC(
4029 (CmpInst::Predicate)CondDef->getOperand(1).getPredicate(), CondCode,
4030 CondCode2);
4031
4032 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
4033 // instructions to emit the comparison.
4034 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
4035 // unnecessary.
4036 if (CondCode2 != AArch64CC::AL)
4037 return false;
4038
4039 // Make sure we'll be able to select the compare.
4040 unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI);
4041 if (!CmpOpc)
4042 return false;
4043
4044 // Emit a new compare.
4045 auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()});
4046 if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
4047 Cmp.addUse(CondDef->getOperand(3).getReg());
4048 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
4049 }
4050
4051 // Emit the select.
4052 unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
4053 auto CSel =
4054 MIB.buildInstr(CSelOpc, {I.getOperand(0).getReg()},
4055 {I.getOperand(2).getReg(), I.getOperand(3).getReg()})
4056 .addImm(CondCode);
4057 constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI);
4058 I.eraseFromParent();
4059 return true;
4060 }
4061
tryFoldIntegerCompare(MachineOperand & LHS,MachineOperand & RHS,MachineOperand & Predicate,MachineIRBuilder & MIRBuilder) const4062 MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
4063 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4064 MachineIRBuilder &MIRBuilder) const {
4065 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
4066 "Unexpected MachineOperand");
4067 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4068 // We want to find this sort of thing:
4069 // x = G_SUB 0, y
4070 // G_ICMP z, x
4071 //
4072 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
4073 // e.g:
4074 //
4075 // cmn z, y
4076
4077 // Helper lambda to detect the subtract followed by the compare.
4078 // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0.
4079 auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) {
4080 if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB)
4081 return false;
4082
4083 // Need to make sure NZCV is the same at the end of the transformation.
4084 if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
4085 return false;
4086
4087 // We want to match against SUBs.
4088 if (DefMI->getOpcode() != TargetOpcode::G_SUB)
4089 return false;
4090
4091 // Make sure that we're getting
4092 // x = G_SUB 0, y
4093 auto ValAndVReg =
4094 getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI);
4095 if (!ValAndVReg || ValAndVReg->Value != 0)
4096 return false;
4097
4098 // This can safely be represented as a CMN.
4099 return true;
4100 };
4101
4102 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
4103 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
4104 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
4105 CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
4106 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P);
4107
4108 // Given this:
4109 //
4110 // x = G_SUB 0, y
4111 // G_ICMP x, z
4112 //
4113 // Produce this:
4114 //
4115 // cmn y, z
4116 if (IsCMN(LHSDef, CC))
4117 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
4118
4119 // Same idea here, but with the RHS of the compare instead:
4120 //
4121 // Given this:
4122 //
4123 // x = G_SUB 0, y
4124 // G_ICMP z, x
4125 //
4126 // Produce this:
4127 //
4128 // cmn z, y
4129 if (IsCMN(RHSDef, CC))
4130 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
4131
4132 // Given this:
4133 //
4134 // z = G_AND x, y
4135 // G_ICMP z, 0
4136 //
4137 // Produce this if the compare is signed:
4138 //
4139 // tst x, y
4140 if (!isUnsignedICMPPred(P) && LHSDef &&
4141 LHSDef->getOpcode() == TargetOpcode::G_AND) {
4142 // Make sure that the RHS is 0.
4143 auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
4144 if (!ValAndVReg || ValAndVReg->Value != 0)
4145 return nullptr;
4146
4147 return emitTST(LHSDef->getOperand(1).getReg(),
4148 LHSDef->getOperand(2).getReg(), MIRBuilder);
4149 }
4150
4151 return nullptr;
4152 }
4153
tryOptArithImmedIntegerCompare(MachineOperand & LHS,MachineOperand & RHS,CmpInst::Predicate & P,MachineIRBuilder & MIB) const4154 MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare(
4155 MachineOperand &LHS, MachineOperand &RHS, CmpInst::Predicate &P,
4156 MachineIRBuilder &MIB) const {
4157 // Attempt to select the immediate form of an integer compare.
4158 MachineRegisterInfo &MRI = *MIB.getMRI();
4159 auto Ty = MRI.getType(LHS.getReg());
4160 assert(!Ty.isVector() && "Expected scalar or pointer only?");
4161 unsigned Size = Ty.getSizeInBits();
4162 assert((Size == 32 || Size == 64) &&
4163 "Expected 32 bit or 64 bit compare only?");
4164
4165 // Check if this is a case we can already handle.
4166 InstructionSelector::ComplexRendererFns ImmFns;
4167 ImmFns = selectArithImmed(RHS);
4168
4169 if (!ImmFns) {
4170 // We didn't get a rendering function, but we may still have a constant.
4171 auto MaybeImmed = getImmedFromMO(RHS);
4172 if (!MaybeImmed)
4173 return nullptr;
4174
4175 // We have a constant, but it doesn't fit. Try adjusting it by one and
4176 // updating the predicate if possible.
4177 uint64_t C = *MaybeImmed;
4178 CmpInst::Predicate NewP;
4179 switch (P) {
4180 default:
4181 return nullptr;
4182 case CmpInst::ICMP_SLT:
4183 case CmpInst::ICMP_SGE:
4184 // Check for
4185 //
4186 // x slt c => x sle c - 1
4187 // x sge c => x sgt c - 1
4188 //
4189 // When c is not the smallest possible negative number.
4190 if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) ||
4191 (Size == 32 && static_cast<int32_t>(C) == INT32_MIN))
4192 return nullptr;
4193 NewP = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT;
4194 C -= 1;
4195 break;
4196 case CmpInst::ICMP_ULT:
4197 case CmpInst::ICMP_UGE:
4198 // Check for
4199 //
4200 // x ult c => x ule c - 1
4201 // x uge c => x ugt c - 1
4202 //
4203 // When c is not zero.
4204 if (C == 0)
4205 return nullptr;
4206 NewP = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
4207 C -= 1;
4208 break;
4209 case CmpInst::ICMP_SLE:
4210 case CmpInst::ICMP_SGT:
4211 // Check for
4212 //
4213 // x sle c => x slt c + 1
4214 // x sgt c => s sge c + 1
4215 //
4216 // When c is not the largest possible signed integer.
4217 if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) ||
4218 (Size == 64 && static_cast<int64_t>(C) == INT64_MAX))
4219 return nullptr;
4220 NewP = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE;
4221 C += 1;
4222 break;
4223 case CmpInst::ICMP_ULE:
4224 case CmpInst::ICMP_UGT:
4225 // Check for
4226 //
4227 // x ule c => x ult c + 1
4228 // x ugt c => s uge c + 1
4229 //
4230 // When c is not the largest possible unsigned integer.
4231 if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) ||
4232 (Size == 64 && C == UINT64_MAX))
4233 return nullptr;
4234 NewP = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;
4235 C += 1;
4236 break;
4237 }
4238
4239 // Check if the new constant is valid.
4240 if (Size == 32)
4241 C = static_cast<uint32_t>(C);
4242 ImmFns = select12BitValueWithLeftShift(C);
4243 if (!ImmFns)
4244 return nullptr;
4245 P = NewP;
4246 }
4247
4248 // At this point, we know we can select an immediate form. Go ahead and do
4249 // that.
4250 Register ZReg;
4251 unsigned Opc;
4252 if (Size == 32) {
4253 ZReg = AArch64::WZR;
4254 Opc = AArch64::SUBSWri;
4255 } else {
4256 ZReg = AArch64::XZR;
4257 Opc = AArch64::SUBSXri;
4258 }
4259
4260 auto CmpMI = MIB.buildInstr(Opc, {ZReg}, {LHS.getReg()});
4261 for (auto &RenderFn : *ImmFns)
4262 RenderFn(CmpMI);
4263 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
4264 return &*CmpMI;
4265 }
4266
tryOptArithShiftedCompare(MachineOperand & LHS,MachineOperand & RHS,MachineIRBuilder & MIB) const4267 MachineInstr *AArch64InstructionSelector::tryOptArithShiftedCompare(
4268 MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIB) const {
4269 // We are looking for the following pattern:
4270 //
4271 // shift = G_SHL/ASHR/LHSR y, c
4272 // ...
4273 // cmp = G_ICMP pred, something, shift
4274 //
4275 // Since we will select the G_ICMP to a SUBS, we can potentially fold the
4276 // shift into the subtract.
4277 static const unsigned OpcTable[2] = {AArch64::SUBSWrs, AArch64::SUBSXrs};
4278 static const Register ZRegTable[2] = {AArch64::WZR, AArch64::XZR};
4279 auto ImmFns = selectShiftedRegister(RHS);
4280 if (!ImmFns)
4281 return nullptr;
4282 MachineRegisterInfo &MRI = *MIB.getMRI();
4283 auto Ty = MRI.getType(LHS.getReg());
4284 assert(!Ty.isVector() && "Expected scalar or pointer only?");
4285 unsigned Size = Ty.getSizeInBits();
4286 bool Idx = (Size == 64);
4287 Register ZReg = ZRegTable[Idx];
4288 unsigned Opc = OpcTable[Idx];
4289 auto CmpMI = MIB.buildInstr(Opc, {ZReg}, {LHS.getReg()});
4290 for (auto &RenderFn : *ImmFns)
4291 RenderFn(CmpMI);
4292 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
4293 return &*CmpMI;
4294 }
4295
selectShuffleVector(MachineInstr & I,MachineRegisterInfo & MRI) const4296 bool AArch64InstructionSelector::selectShuffleVector(
4297 MachineInstr &I, MachineRegisterInfo &MRI) const {
4298 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4299 Register Src1Reg = I.getOperand(1).getReg();
4300 const LLT Src1Ty = MRI.getType(Src1Reg);
4301 Register Src2Reg = I.getOperand(2).getReg();
4302 const LLT Src2Ty = MRI.getType(Src2Reg);
4303 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
4304
4305 MachineBasicBlock &MBB = *I.getParent();
4306 MachineFunction &MF = *MBB.getParent();
4307 LLVMContext &Ctx = MF.getFunction().getContext();
4308
4309 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
4310 // it's originated from a <1 x T> type. Those should have been lowered into
4311 // G_BUILD_VECTOR earlier.
4312 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
4313 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
4314 return false;
4315 }
4316
4317 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
4318
4319 SmallVector<Constant *, 64> CstIdxs;
4320 for (int Val : Mask) {
4321 // For now, any undef indexes we'll just assume to be 0. This should be
4322 // optimized in future, e.g. to select DUP etc.
4323 Val = Val < 0 ? 0 : Val;
4324 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
4325 unsigned Offset = Byte + Val * BytesPerElt;
4326 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
4327 }
4328 }
4329
4330 MachineIRBuilder MIRBuilder(I);
4331
4332 // Use a constant pool to load the index vector for TBL.
4333 Constant *CPVal = ConstantVector::get(CstIdxs);
4334 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
4335 if (!IndexLoad) {
4336 LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
4337 return false;
4338 }
4339
4340 if (DstTy.getSizeInBits() != 128) {
4341 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
4342 // This case can be done with TBL1.
4343 MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
4344 if (!Concat) {
4345 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
4346 return false;
4347 }
4348
4349 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
4350 IndexLoad =
4351 emitScalarToVector(64, &AArch64::FPR128RegClass,
4352 IndexLoad->getOperand(0).getReg(), MIRBuilder);
4353
4354 auto TBL1 = MIRBuilder.buildInstr(
4355 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
4356 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
4357 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
4358
4359 auto Copy =
4360 MIRBuilder
4361 .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
4362 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
4363 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
4364 I.eraseFromParent();
4365 return true;
4366 }
4367
4368 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
4369 // Q registers for regalloc.
4370 auto RegSeq = MIRBuilder
4371 .buildInstr(TargetOpcode::REG_SEQUENCE,
4372 {&AArch64::QQRegClass}, {Src1Reg})
4373 .addImm(AArch64::qsub0)
4374 .addUse(Src2Reg)
4375 .addImm(AArch64::qsub1);
4376
4377 auto TBL2 = MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
4378 {RegSeq, IndexLoad->getOperand(0)});
4379 constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
4380 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
4381 I.eraseFromParent();
4382 return true;
4383 }
4384
emitLaneInsert(Optional<Register> DstReg,Register SrcReg,Register EltReg,unsigned LaneIdx,const RegisterBank & RB,MachineIRBuilder & MIRBuilder) const4385 MachineInstr *AArch64InstructionSelector::emitLaneInsert(
4386 Optional<Register> DstReg, Register SrcReg, Register EltReg,
4387 unsigned LaneIdx, const RegisterBank &RB,
4388 MachineIRBuilder &MIRBuilder) const {
4389 MachineInstr *InsElt = nullptr;
4390 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
4391 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4392
4393 // Create a register to define with the insert if one wasn't passed in.
4394 if (!DstReg)
4395 DstReg = MRI.createVirtualRegister(DstRC);
4396
4397 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
4398 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
4399
4400 if (RB.getID() == AArch64::FPRRegBankID) {
4401 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
4402 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4403 .addImm(LaneIdx)
4404 .addUse(InsSub->getOperand(0).getReg())
4405 .addImm(0);
4406 } else {
4407 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4408 .addImm(LaneIdx)
4409 .addUse(EltReg);
4410 }
4411
4412 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4413 return InsElt;
4414 }
4415
selectInsertElt(MachineInstr & I,MachineRegisterInfo & MRI) const4416 bool AArch64InstructionSelector::selectInsertElt(
4417 MachineInstr &I, MachineRegisterInfo &MRI) const {
4418 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
4419
4420 // Get information on the destination.
4421 Register DstReg = I.getOperand(0).getReg();
4422 const LLT DstTy = MRI.getType(DstReg);
4423 unsigned VecSize = DstTy.getSizeInBits();
4424
4425 // Get information on the element we want to insert into the destination.
4426 Register EltReg = I.getOperand(2).getReg();
4427 const LLT EltTy = MRI.getType(EltReg);
4428 unsigned EltSize = EltTy.getSizeInBits();
4429 if (EltSize < 16 || EltSize > 64)
4430 return false; // Don't support all element types yet.
4431
4432 // Find the definition of the index. Bail out if it's not defined by a
4433 // G_CONSTANT.
4434 Register IdxReg = I.getOperand(3).getReg();
4435 auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
4436 if (!VRegAndVal)
4437 return false;
4438 unsigned LaneIdx = VRegAndVal->Value;
4439
4440 // Perform the lane insert.
4441 Register SrcReg = I.getOperand(1).getReg();
4442 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
4443 MachineIRBuilder MIRBuilder(I);
4444
4445 if (VecSize < 128) {
4446 // If the vector we're inserting into is smaller than 128 bits, widen it
4447 // to 128 to do the insert.
4448 MachineInstr *ScalarToVec = emitScalarToVector(
4449 VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
4450 if (!ScalarToVec)
4451 return false;
4452 SrcReg = ScalarToVec->getOperand(0).getReg();
4453 }
4454
4455 // Create an insert into a new FPR128 register.
4456 // Note that if our vector is already 128 bits, we end up emitting an extra
4457 // register.
4458 MachineInstr *InsMI =
4459 emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
4460
4461 if (VecSize < 128) {
4462 // If we had to widen to perform the insert, then we have to demote back to
4463 // the original size to get the result we want.
4464 Register DemoteVec = InsMI->getOperand(0).getReg();
4465 const TargetRegisterClass *RC =
4466 getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
4467 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
4468 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
4469 return false;
4470 }
4471 unsigned SubReg = 0;
4472 if (!getSubRegForClass(RC, TRI, SubReg))
4473 return false;
4474 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
4475 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
4476 << "\n");
4477 return false;
4478 }
4479 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
4480 .addReg(DemoteVec, 0, SubReg);
4481 RBI.constrainGenericRegister(DstReg, *RC, MRI);
4482 } else {
4483 // No widening needed.
4484 InsMI->getOperand(0).setReg(DstReg);
4485 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
4486 }
4487
4488 I.eraseFromParent();
4489 return true;
4490 }
4491
tryOptConstantBuildVec(MachineInstr & I,LLT DstTy,MachineRegisterInfo & MRI) const4492 bool AArch64InstructionSelector::tryOptConstantBuildVec(
4493 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) const {
4494 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4495 assert(DstTy.getSizeInBits() <= 128 && "Unexpected build_vec type!");
4496 if (DstTy.getSizeInBits() < 32)
4497 return false;
4498 // Check if we're building a constant vector, in which case we want to
4499 // generate a constant pool load instead of a vector insert sequence.
4500 SmallVector<Constant *, 16> Csts;
4501 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
4502 // Try to find G_CONSTANT or G_FCONSTANT
4503 auto *OpMI =
4504 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
4505 if (OpMI)
4506 Csts.emplace_back(
4507 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
4508 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
4509 I.getOperand(Idx).getReg(), MRI)))
4510 Csts.emplace_back(
4511 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
4512 else
4513 return false;
4514 }
4515 Constant *CV = ConstantVector::get(Csts);
4516 MachineIRBuilder MIB(I);
4517 auto *CPLoad = emitLoadFromConstantPool(CV, MIB);
4518 if (!CPLoad) {
4519 LLVM_DEBUG(dbgs() << "Could not generate cp load for build_vector");
4520 return false;
4521 }
4522 MIB.buildCopy(I.getOperand(0), CPLoad->getOperand(0));
4523 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
4524 *MRI.getRegClass(CPLoad->getOperand(0).getReg()),
4525 MRI);
4526 I.eraseFromParent();
4527 return true;
4528 }
4529
selectBuildVector(MachineInstr & I,MachineRegisterInfo & MRI) const4530 bool AArch64InstructionSelector::selectBuildVector(
4531 MachineInstr &I, MachineRegisterInfo &MRI) const {
4532 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4533 // Until we port more of the optimized selections, for now just use a vector
4534 // insert sequence.
4535 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4536 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
4537 unsigned EltSize = EltTy.getSizeInBits();
4538
4539 if (tryOptConstantBuildVec(I, DstTy, MRI))
4540 return true;
4541 if (EltSize < 16 || EltSize > 64)
4542 return false; // Don't support all element types yet.
4543 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
4544 MachineIRBuilder MIRBuilder(I);
4545
4546 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
4547 MachineInstr *ScalarToVec =
4548 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
4549 I.getOperand(1).getReg(), MIRBuilder);
4550 if (!ScalarToVec)
4551 return false;
4552
4553 Register DstVec = ScalarToVec->getOperand(0).getReg();
4554 unsigned DstSize = DstTy.getSizeInBits();
4555
4556 // Keep track of the last MI we inserted. Later on, we might be able to save
4557 // a copy using it.
4558 MachineInstr *PrevMI = nullptr;
4559 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
4560 // Note that if we don't do a subregister copy, we can end up making an
4561 // extra register.
4562 PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
4563 MIRBuilder);
4564 DstVec = PrevMI->getOperand(0).getReg();
4565 }
4566
4567 // If DstTy's size in bits is less than 128, then emit a subregister copy
4568 // from DstVec to the last register we've defined.
4569 if (DstSize < 128) {
4570 // Force this to be FPR using the destination vector.
4571 const TargetRegisterClass *RC =
4572 getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
4573 if (!RC)
4574 return false;
4575 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
4576 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
4577 return false;
4578 }
4579
4580 unsigned SubReg = 0;
4581 if (!getSubRegForClass(RC, TRI, SubReg))
4582 return false;
4583 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
4584 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
4585 << "\n");
4586 return false;
4587 }
4588
4589 Register Reg = MRI.createVirtualRegister(RC);
4590 Register DstReg = I.getOperand(0).getReg();
4591
4592 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
4593 .addReg(DstVec, 0, SubReg);
4594 MachineOperand &RegOp = I.getOperand(1);
4595 RegOp.setReg(Reg);
4596 RBI.constrainGenericRegister(DstReg, *RC, MRI);
4597 } else {
4598 // We don't need a subregister copy. Save a copy by re-using the
4599 // destination register on the final insert.
4600 assert(PrevMI && "PrevMI was null?");
4601 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
4602 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
4603 }
4604
4605 I.eraseFromParent();
4606 return true;
4607 }
4608
4609 /// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
4610 /// ID if it exists, and 0 otherwise.
findIntrinsicID(MachineInstr & I)4611 static unsigned findIntrinsicID(MachineInstr &I) {
4612 auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
4613 return Op.isIntrinsicID();
4614 });
4615 if (IntrinOp == I.operands_end())
4616 return 0;
4617 return IntrinOp->getIntrinsicID();
4618 }
4619
selectIntrinsicWithSideEffects(MachineInstr & I,MachineRegisterInfo & MRI) const4620 bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
4621 MachineInstr &I, MachineRegisterInfo &MRI) const {
4622 // Find the intrinsic ID.
4623 unsigned IntrinID = findIntrinsicID(I);
4624 if (!IntrinID)
4625 return false;
4626 MachineIRBuilder MIRBuilder(I);
4627
4628 // Select the instruction.
4629 switch (IntrinID) {
4630 default:
4631 return false;
4632 case Intrinsic::trap:
4633 MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
4634 break;
4635 case Intrinsic::debugtrap:
4636 if (!STI.isTargetWindows())
4637 return false;
4638 MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
4639 break;
4640 }
4641
4642 I.eraseFromParent();
4643 return true;
4644 }
4645
selectIntrinsic(MachineInstr & I,MachineRegisterInfo & MRI)4646 bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
4647 MachineRegisterInfo &MRI) {
4648 unsigned IntrinID = findIntrinsicID(I);
4649 if (!IntrinID)
4650 return false;
4651 MachineIRBuilder MIRBuilder(I);
4652
4653 switch (IntrinID) {
4654 default:
4655 break;
4656 case Intrinsic::aarch64_crypto_sha1h: {
4657 Register DstReg = I.getOperand(0).getReg();
4658 Register SrcReg = I.getOperand(2).getReg();
4659
4660 // FIXME: Should this be an assert?
4661 if (MRI.getType(DstReg).getSizeInBits() != 32 ||
4662 MRI.getType(SrcReg).getSizeInBits() != 32)
4663 return false;
4664
4665 // The operation has to happen on FPRs. Set up some new FPR registers for
4666 // the source and destination if they are on GPRs.
4667 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4668 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
4669 MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)});
4670
4671 // Make sure the copy ends up getting constrained properly.
4672 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
4673 AArch64::GPR32RegClass, MRI);
4674 }
4675
4676 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
4677 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
4678
4679 // Actually insert the instruction.
4680 auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
4681 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
4682
4683 // Did we create a new register for the destination?
4684 if (DstReg != I.getOperand(0).getReg()) {
4685 // Yep. Copy the result of the instruction back into the original
4686 // destination.
4687 MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg});
4688 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
4689 AArch64::GPR32RegClass, MRI);
4690 }
4691
4692 I.eraseFromParent();
4693 return true;
4694 }
4695 case Intrinsic::frameaddress:
4696 case Intrinsic::returnaddress: {
4697 MachineFunction &MF = *I.getParent()->getParent();
4698 MachineFrameInfo &MFI = MF.getFrameInfo();
4699
4700 unsigned Depth = I.getOperand(2).getImm();
4701 Register DstReg = I.getOperand(0).getReg();
4702 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
4703
4704 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
4705 if (MFReturnAddr) {
4706 MIRBuilder.buildCopy({DstReg}, MFReturnAddr);
4707 I.eraseFromParent();
4708 return true;
4709 }
4710 MFI.setReturnAddressIsTaken(true);
4711 MF.addLiveIn(AArch64::LR, &AArch64::GPR64spRegClass);
4712 // Insert the copy from LR/X30 into the entry block, before it can be
4713 // clobbered by anything.
4714 MachineBasicBlock &EntryBlock = *MF.begin();
4715 if (!EntryBlock.isLiveIn(AArch64::LR))
4716 EntryBlock.addLiveIn(AArch64::LR);
4717 MachineIRBuilder EntryBuilder(MF);
4718 EntryBuilder.setInstr(*EntryBlock.begin());
4719 EntryBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
4720 MFReturnAddr = DstReg;
4721 I.eraseFromParent();
4722 return true;
4723 }
4724
4725 MFI.setFrameAddressIsTaken(true);
4726 Register FrameAddr(AArch64::FP);
4727 while (Depth--) {
4728 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
4729 auto Ldr =
4730 MIRBuilder.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr})
4731 .addImm(0);
4732 constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI);
4733 FrameAddr = NextFrame;
4734 }
4735
4736 if (IntrinID == Intrinsic::frameaddress)
4737 MIRBuilder.buildCopy({DstReg}, {FrameAddr});
4738 else {
4739 MFI.setReturnAddressIsTaken(true);
4740 MIRBuilder.buildInstr(AArch64::LDRXui, {DstReg}, {FrameAddr}).addImm(1);
4741 }
4742
4743 I.eraseFromParent();
4744 return true;
4745 }
4746 }
4747 return false;
4748 }
4749
4750 InstructionSelector::ComplexRendererFns
selectShiftA_32(const MachineOperand & Root) const4751 AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
4752 auto MaybeImmed = getImmedFromMO(Root);
4753 if (MaybeImmed == None || *MaybeImmed > 31)
4754 return None;
4755 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
4756 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4757 }
4758
4759 InstructionSelector::ComplexRendererFns
selectShiftB_32(const MachineOperand & Root) const4760 AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
4761 auto MaybeImmed = getImmedFromMO(Root);
4762 if (MaybeImmed == None || *MaybeImmed > 31)
4763 return None;
4764 uint64_t Enc = 31 - *MaybeImmed;
4765 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4766 }
4767
4768 InstructionSelector::ComplexRendererFns
selectShiftA_64(const MachineOperand & Root) const4769 AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
4770 auto MaybeImmed = getImmedFromMO(Root);
4771 if (MaybeImmed == None || *MaybeImmed > 63)
4772 return None;
4773 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
4774 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4775 }
4776
4777 InstructionSelector::ComplexRendererFns
selectShiftB_64(const MachineOperand & Root) const4778 AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
4779 auto MaybeImmed = getImmedFromMO(Root);
4780 if (MaybeImmed == None || *MaybeImmed > 63)
4781 return None;
4782 uint64_t Enc = 63 - *MaybeImmed;
4783 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4784 }
4785
4786 /// Helper to select an immediate value that can be represented as a 12-bit
4787 /// value shifted left by either 0 or 12. If it is possible to do so, return
4788 /// the immediate and shift value. If not, return None.
4789 ///
4790 /// Used by selectArithImmed and selectNegArithImmed.
4791 InstructionSelector::ComplexRendererFns
select12BitValueWithLeftShift(uint64_t Immed) const4792 AArch64InstructionSelector::select12BitValueWithLeftShift(
4793 uint64_t Immed) const {
4794 unsigned ShiftAmt;
4795 if (Immed >> 12 == 0) {
4796 ShiftAmt = 0;
4797 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
4798 ShiftAmt = 12;
4799 Immed = Immed >> 12;
4800 } else
4801 return None;
4802
4803 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
4804 return {{
4805 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
4806 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
4807 }};
4808 }
4809
4810 /// SelectArithImmed - Select an immediate value that can be represented as
4811 /// a 12-bit value shifted left by either 0 or 12. If so, return true with
4812 /// Val set to the 12-bit value and Shift set to the shifter operand.
4813 InstructionSelector::ComplexRendererFns
selectArithImmed(MachineOperand & Root) const4814 AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
4815 // This function is called from the addsub_shifted_imm ComplexPattern,
4816 // which lists [imm] as the list of opcode it's interested in, however
4817 // we still need to check whether the operand is actually an immediate
4818 // here because the ComplexPattern opcode list is only used in
4819 // root-level opcode matching.
4820 auto MaybeImmed = getImmedFromMO(Root);
4821 if (MaybeImmed == None)
4822 return None;
4823 return select12BitValueWithLeftShift(*MaybeImmed);
4824 }
4825
4826 /// SelectNegArithImmed - As above, but negates the value before trying to
4827 /// select it.
4828 InstructionSelector::ComplexRendererFns
selectNegArithImmed(MachineOperand & Root) const4829 AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
4830 // We need a register here, because we need to know if we have a 64 or 32
4831 // bit immediate.
4832 if (!Root.isReg())
4833 return None;
4834 auto MaybeImmed = getImmedFromMO(Root);
4835 if (MaybeImmed == None)
4836 return None;
4837 uint64_t Immed = *MaybeImmed;
4838
4839 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
4840 // have the opposite effect on the C flag, so this pattern mustn't match under
4841 // those circumstances.
4842 if (Immed == 0)
4843 return None;
4844
4845 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
4846 // the root.
4847 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4848 if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
4849 Immed = ~((uint32_t)Immed) + 1;
4850 else
4851 Immed = ~Immed + 1ULL;
4852
4853 if (Immed & 0xFFFFFFFFFF000000ULL)
4854 return None;
4855
4856 Immed &= 0xFFFFFFULL;
4857 return select12BitValueWithLeftShift(Immed);
4858 }
4859
4860 /// Return true if it is worth folding MI into an extended register. That is,
4861 /// if it's safe to pull it into the addressing mode of a load or store as a
4862 /// shift.
isWorthFoldingIntoExtendedReg(MachineInstr & MI,const MachineRegisterInfo & MRI) const4863 bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
4864 MachineInstr &MI, const MachineRegisterInfo &MRI) const {
4865 // Always fold if there is one use, or if we're optimizing for size.
4866 Register DefReg = MI.getOperand(0).getReg();
4867 if (MRI.hasOneNonDBGUse(DefReg) ||
4868 MI.getParent()->getParent()->getFunction().hasMinSize())
4869 return true;
4870
4871 // It's better to avoid folding and recomputing shifts when we don't have a
4872 // fastpath.
4873 if (!STI.hasLSLFast())
4874 return false;
4875
4876 // We have a fastpath, so folding a shift in and potentially computing it
4877 // many times may be beneficial. Check if this is only used in memory ops.
4878 // If it is, then we should fold.
4879 return all_of(MRI.use_nodbg_instructions(DefReg),
4880 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
4881 }
4882
isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)4883 static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) {
4884 switch (Type) {
4885 case AArch64_AM::SXTB:
4886 case AArch64_AM::SXTH:
4887 case AArch64_AM::SXTW:
4888 return true;
4889 default:
4890 return false;
4891 }
4892 }
4893
4894 InstructionSelector::ComplexRendererFns
selectExtendedSHL(MachineOperand & Root,MachineOperand & Base,MachineOperand & Offset,unsigned SizeInBytes,bool WantsExt) const4895 AArch64InstructionSelector::selectExtendedSHL(
4896 MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
4897 unsigned SizeInBytes, bool WantsExt) const {
4898 assert(Base.isReg() && "Expected base to be a register operand");
4899 assert(Offset.isReg() && "Expected offset to be a register operand");
4900
4901 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4902 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
4903 if (!OffsetInst)
4904 return None;
4905
4906 unsigned OffsetOpc = OffsetInst->getOpcode();
4907 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
4908 return None;
4909
4910 // Make sure that the memory op is a valid size.
4911 int64_t LegalShiftVal = Log2_32(SizeInBytes);
4912 if (LegalShiftVal == 0)
4913 return None;
4914 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
4915 return None;
4916
4917 // Now, try to find the specific G_CONSTANT. Start by assuming that the
4918 // register we will offset is the LHS, and the register containing the
4919 // constant is the RHS.
4920 Register OffsetReg = OffsetInst->getOperand(1).getReg();
4921 Register ConstantReg = OffsetInst->getOperand(2).getReg();
4922 auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
4923 if (!ValAndVReg) {
4924 // We didn't get a constant on the RHS. If the opcode is a shift, then
4925 // we're done.
4926 if (OffsetOpc == TargetOpcode::G_SHL)
4927 return None;
4928
4929 // If we have a G_MUL, we can use either register. Try looking at the RHS.
4930 std::swap(OffsetReg, ConstantReg);
4931 ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
4932 if (!ValAndVReg)
4933 return None;
4934 }
4935
4936 // The value must fit into 3 bits, and must be positive. Make sure that is
4937 // true.
4938 int64_t ImmVal = ValAndVReg->Value;
4939
4940 // Since we're going to pull this into a shift, the constant value must be
4941 // a power of 2. If we got a multiply, then we need to check this.
4942 if (OffsetOpc == TargetOpcode::G_MUL) {
4943 if (!isPowerOf2_32(ImmVal))
4944 return None;
4945
4946 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
4947 ImmVal = Log2_32(ImmVal);
4948 }
4949
4950 if ((ImmVal & 0x7) != ImmVal)
4951 return None;
4952
4953 // We are only allowed to shift by LegalShiftVal. This shift value is built
4954 // into the instruction, so we can't just use whatever we want.
4955 if (ImmVal != LegalShiftVal)
4956 return None;
4957
4958 unsigned SignExtend = 0;
4959 if (WantsExt) {
4960 // Check if the offset is defined by an extend.
4961 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
4962 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
4963 if (Ext == AArch64_AM::InvalidShiftExtend)
4964 return None;
4965
4966 SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
4967 // We only support SXTW for signed extension here.
4968 if (SignExtend && Ext != AArch64_AM::SXTW)
4969 return None;
4970
4971 // Need a 32-bit wide register here.
4972 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
4973 OffsetReg = ExtInst->getOperand(1).getReg();
4974 OffsetReg = narrowExtendRegIfNeeded(OffsetReg, MIB);
4975 }
4976
4977 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
4978 // offset. Signify that we are shifting by setting the shift flag to 1.
4979 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
4980 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
4981 [=](MachineInstrBuilder &MIB) {
4982 // Need to add both immediates here to make sure that they are both
4983 // added to the instruction.
4984 MIB.addImm(SignExtend);
4985 MIB.addImm(1);
4986 }}};
4987 }
4988
4989 /// This is used for computing addresses like this:
4990 ///
4991 /// ldr x1, [x2, x3, lsl #3]
4992 ///
4993 /// Where x2 is the base register, and x3 is an offset register. The shift-left
4994 /// is a constant value specific to this load instruction. That is, we'll never
4995 /// see anything other than a 3 here (which corresponds to the size of the
4996 /// element being loaded.)
4997 InstructionSelector::ComplexRendererFns
selectAddrModeShiftedExtendXReg(MachineOperand & Root,unsigned SizeInBytes) const4998 AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
4999 MachineOperand &Root, unsigned SizeInBytes) const {
5000 if (!Root.isReg())
5001 return None;
5002 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5003
5004 // We want to find something like this:
5005 //
5006 // val = G_CONSTANT LegalShiftVal
5007 // shift = G_SHL off_reg val
5008 // ptr = G_PTR_ADD base_reg shift
5009 // x = G_LOAD ptr
5010 //
5011 // And fold it into this addressing mode:
5012 //
5013 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
5014
5015 // Check if we can find the G_PTR_ADD.
5016 MachineInstr *PtrAdd =
5017 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5018 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
5019 return None;
5020
5021 // Now, try to match an opcode which will match our specific offset.
5022 // We want a G_SHL or a G_MUL.
5023 MachineInstr *OffsetInst =
5024 getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);
5025 return selectExtendedSHL(Root, PtrAdd->getOperand(1),
5026 OffsetInst->getOperand(0), SizeInBytes,
5027 /*WantsExt=*/false);
5028 }
5029
5030 /// This is used for computing addresses like this:
5031 ///
5032 /// ldr x1, [x2, x3]
5033 ///
5034 /// Where x2 is the base register, and x3 is an offset register.
5035 ///
5036 /// When possible (or profitable) to fold a G_PTR_ADD into the address calculation,
5037 /// this will do so. Otherwise, it will return None.
5038 InstructionSelector::ComplexRendererFns
selectAddrModeRegisterOffset(MachineOperand & Root) const5039 AArch64InstructionSelector::selectAddrModeRegisterOffset(
5040 MachineOperand &Root) const {
5041 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5042
5043 // We need a GEP.
5044 MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
5045 if (!Gep || Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
5046 return None;
5047
5048 // If this is used more than once, let's not bother folding.
5049 // TODO: Check if they are memory ops. If they are, then we can still fold
5050 // without having to recompute anything.
5051 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
5052 return None;
5053
5054 // Base is the GEP's LHS, offset is its RHS.
5055 return {{[=](MachineInstrBuilder &MIB) {
5056 MIB.addUse(Gep->getOperand(1).getReg());
5057 },
5058 [=](MachineInstrBuilder &MIB) {
5059 MIB.addUse(Gep->getOperand(2).getReg());
5060 },
5061 [=](MachineInstrBuilder &MIB) {
5062 // Need to add both immediates here to make sure that they are both
5063 // added to the instruction.
5064 MIB.addImm(0);
5065 MIB.addImm(0);
5066 }}};
5067 }
5068
5069 /// This is intended to be equivalent to selectAddrModeXRO in
5070 /// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
5071 InstructionSelector::ComplexRendererFns
selectAddrModeXRO(MachineOperand & Root,unsigned SizeInBytes) const5072 AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
5073 unsigned SizeInBytes) const {
5074 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5075
5076 // If we have a constant offset, then we probably don't want to match a
5077 // register offset.
5078 if (isBaseWithConstantOffset(Root, MRI))
5079 return None;
5080
5081 // Try to fold shifts into the addressing mode.
5082 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
5083 if (AddrModeFns)
5084 return AddrModeFns;
5085
5086 // If that doesn't work, see if it's possible to fold in registers from
5087 // a GEP.
5088 return selectAddrModeRegisterOffset(Root);
5089 }
5090
5091 /// This is used for computing addresses like this:
5092 ///
5093 /// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
5094 ///
5095 /// Where we have a 64-bit base register, a 32-bit offset register, and an
5096 /// extend (which may or may not be signed).
5097 InstructionSelector::ComplexRendererFns
selectAddrModeWRO(MachineOperand & Root,unsigned SizeInBytes) const5098 AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
5099 unsigned SizeInBytes) const {
5100 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5101
5102 MachineInstr *PtrAdd =
5103 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5104 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
5105 return None;
5106
5107 MachineOperand &LHS = PtrAdd->getOperand(1);
5108 MachineOperand &RHS = PtrAdd->getOperand(2);
5109 MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
5110
5111 // The first case is the same as selectAddrModeXRO, except we need an extend.
5112 // In this case, we try to find a shift and extend, and fold them into the
5113 // addressing mode.
5114 //
5115 // E.g.
5116 //
5117 // off_reg = G_Z/S/ANYEXT ext_reg
5118 // val = G_CONSTANT LegalShiftVal
5119 // shift = G_SHL off_reg val
5120 // ptr = G_PTR_ADD base_reg shift
5121 // x = G_LOAD ptr
5122 //
5123 // In this case we can get a load like this:
5124 //
5125 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
5126 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
5127 SizeInBytes, /*WantsExt=*/true);
5128 if (ExtendedShl)
5129 return ExtendedShl;
5130
5131 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
5132 //
5133 // e.g.
5134 // ldr something, [base_reg, ext_reg, sxtw]
5135 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
5136 return None;
5137
5138 // Check if this is an extend. We'll get an extend type if it is.
5139 AArch64_AM::ShiftExtendType Ext =
5140 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
5141 if (Ext == AArch64_AM::InvalidShiftExtend)
5142 return None;
5143
5144 // Need a 32-bit wide register.
5145 MachineIRBuilder MIB(*PtrAdd);
5146 Register ExtReg =
5147 narrowExtendRegIfNeeded(OffsetInst->getOperand(1).getReg(), MIB);
5148 unsigned SignExtend = Ext == AArch64_AM::SXTW;
5149
5150 // Base is LHS, offset is ExtReg.
5151 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
5152 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
5153 [=](MachineInstrBuilder &MIB) {
5154 MIB.addImm(SignExtend);
5155 MIB.addImm(0);
5156 }}};
5157 }
5158
5159 /// Select a "register plus unscaled signed 9-bit immediate" address. This
5160 /// should only match when there is an offset that is not valid for a scaled
5161 /// immediate addressing mode. The "Size" argument is the size in bytes of the
5162 /// memory reference, which is needed here to know what is valid for a scaled
5163 /// immediate.
5164 InstructionSelector::ComplexRendererFns
selectAddrModeUnscaled(MachineOperand & Root,unsigned Size) const5165 AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
5166 unsigned Size) const {
5167 MachineRegisterInfo &MRI =
5168 Root.getParent()->getParent()->getParent()->getRegInfo();
5169
5170 if (!Root.isReg())
5171 return None;
5172
5173 if (!isBaseWithConstantOffset(Root, MRI))
5174 return None;
5175
5176 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
5177 if (!RootDef)
5178 return None;
5179
5180 MachineOperand &OffImm = RootDef->getOperand(2);
5181 if (!OffImm.isReg())
5182 return None;
5183 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
5184 if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
5185 return None;
5186 int64_t RHSC;
5187 MachineOperand &RHSOp1 = RHS->getOperand(1);
5188 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
5189 return None;
5190 RHSC = RHSOp1.getCImm()->getSExtValue();
5191
5192 // If the offset is valid as a scaled immediate, don't match here.
5193 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
5194 return None;
5195 if (RHSC >= -256 && RHSC < 256) {
5196 MachineOperand &Base = RootDef->getOperand(1);
5197 return {{
5198 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
5199 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
5200 }};
5201 }
5202 return None;
5203 }
5204
5205 InstructionSelector::ComplexRendererFns
tryFoldAddLowIntoImm(MachineInstr & RootDef,unsigned Size,MachineRegisterInfo & MRI) const5206 AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
5207 unsigned Size,
5208 MachineRegisterInfo &MRI) const {
5209 if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
5210 return None;
5211 MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
5212 if (Adrp.getOpcode() != AArch64::ADRP)
5213 return None;
5214
5215 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
5216 // TODO: Need to check GV's offset % size if doing offset folding into globals.
5217 assert(Adrp.getOperand(1).getOffset() == 0 && "Unexpected offset in global");
5218 auto GV = Adrp.getOperand(1).getGlobal();
5219 if (GV->isThreadLocal())
5220 return None;
5221
5222 auto &MF = *RootDef.getParent()->getParent();
5223 if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
5224 return None;
5225
5226 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
5227 MachineIRBuilder MIRBuilder(RootDef);
5228 Register AdrpReg = Adrp.getOperand(0).getReg();
5229 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
5230 [=](MachineInstrBuilder &MIB) {
5231 MIB.addGlobalAddress(GV, /* Offset */ 0,
5232 OpFlags | AArch64II::MO_PAGEOFF |
5233 AArch64II::MO_NC);
5234 }}};
5235 }
5236
5237 /// Select a "register plus scaled unsigned 12-bit immediate" address. The
5238 /// "Size" argument is the size in bytes of the memory reference, which
5239 /// determines the scale.
5240 InstructionSelector::ComplexRendererFns
selectAddrModeIndexed(MachineOperand & Root,unsigned Size) const5241 AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
5242 unsigned Size) const {
5243 MachineFunction &MF = *Root.getParent()->getParent()->getParent();
5244 MachineRegisterInfo &MRI = MF.getRegInfo();
5245
5246 if (!Root.isReg())
5247 return None;
5248
5249 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
5250 if (!RootDef)
5251 return None;
5252
5253 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
5254 return {{
5255 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
5256 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
5257 }};
5258 }
5259
5260 CodeModel::Model CM = MF.getTarget().getCodeModel();
5261 // Check if we can fold in the ADD of small code model ADRP + ADD address.
5262 if (CM == CodeModel::Small) {
5263 auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
5264 if (OpFns)
5265 return OpFns;
5266 }
5267
5268 if (isBaseWithConstantOffset(Root, MRI)) {
5269 MachineOperand &LHS = RootDef->getOperand(1);
5270 MachineOperand &RHS = RootDef->getOperand(2);
5271 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
5272 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
5273 if (LHSDef && RHSDef) {
5274 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
5275 unsigned Scale = Log2_32(Size);
5276 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
5277 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
5278 return {{
5279 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
5280 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
5281 }};
5282
5283 return {{
5284 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
5285 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
5286 }};
5287 }
5288 }
5289 }
5290
5291 // Before falling back to our general case, check if the unscaled
5292 // instructions can handle this. If so, that's preferable.
5293 if (selectAddrModeUnscaled(Root, Size).hasValue())
5294 return None;
5295
5296 return {{
5297 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
5298 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
5299 }};
5300 }
5301
5302 /// Given a shift instruction, return the correct shift type for that
5303 /// instruction.
getShiftTypeForInst(MachineInstr & MI)5304 static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
5305 // TODO: Handle AArch64_AM::ROR
5306 switch (MI.getOpcode()) {
5307 default:
5308 return AArch64_AM::InvalidShiftExtend;
5309 case TargetOpcode::G_SHL:
5310 return AArch64_AM::LSL;
5311 case TargetOpcode::G_LSHR:
5312 return AArch64_AM::LSR;
5313 case TargetOpcode::G_ASHR:
5314 return AArch64_AM::ASR;
5315 }
5316 }
5317
5318 /// Select a "shifted register" operand. If the value is not shifted, set the
5319 /// shift operand to a default value of "lsl 0".
5320 ///
5321 /// TODO: Allow shifted register to be rotated in logical instructions.
5322 InstructionSelector::ComplexRendererFns
selectShiftedRegister(MachineOperand & Root) const5323 AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const {
5324 if (!Root.isReg())
5325 return None;
5326 MachineRegisterInfo &MRI =
5327 Root.getParent()->getParent()->getParent()->getRegInfo();
5328
5329 // Check if the operand is defined by an instruction which corresponds to
5330 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
5331 //
5332 // TODO: Handle AArch64_AM::ROR for logical instructions.
5333 MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
5334 if (!ShiftInst)
5335 return None;
5336 AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst);
5337 if (ShType == AArch64_AM::InvalidShiftExtend)
5338 return None;
5339 if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
5340 return None;
5341
5342 // Need an immediate on the RHS.
5343 MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
5344 auto Immed = getImmedFromMO(ShiftRHS);
5345 if (!Immed)
5346 return None;
5347
5348 // We have something that we can fold. Fold in the shift's LHS and RHS into
5349 // the instruction.
5350 MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
5351 Register ShiftReg = ShiftLHS.getReg();
5352
5353 unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
5354 unsigned Val = *Immed & (NumBits - 1);
5355 unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
5356
5357 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
5358 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
5359 }
5360
getExtendTypeForInst(MachineInstr & MI,MachineRegisterInfo & MRI,bool IsLoadStore) const5361 AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
5362 MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
5363 unsigned Opc = MI.getOpcode();
5364
5365 // Handle explicit extend instructions first.
5366 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
5367 unsigned Size;
5368 if (Opc == TargetOpcode::G_SEXT)
5369 Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
5370 else
5371 Size = MI.getOperand(2).getImm();
5372 assert(Size != 64 && "Extend from 64 bits?");
5373 switch (Size) {
5374 case 8:
5375 return AArch64_AM::SXTB;
5376 case 16:
5377 return AArch64_AM::SXTH;
5378 case 32:
5379 return AArch64_AM::SXTW;
5380 default:
5381 return AArch64_AM::InvalidShiftExtend;
5382 }
5383 }
5384
5385 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
5386 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
5387 assert(Size != 64 && "Extend from 64 bits?");
5388 switch (Size) {
5389 case 8:
5390 return AArch64_AM::UXTB;
5391 case 16:
5392 return AArch64_AM::UXTH;
5393 case 32:
5394 return AArch64_AM::UXTW;
5395 default:
5396 return AArch64_AM::InvalidShiftExtend;
5397 }
5398 }
5399
5400 // Don't have an explicit extend. Try to handle a G_AND with a constant mask
5401 // on the RHS.
5402 if (Opc != TargetOpcode::G_AND)
5403 return AArch64_AM::InvalidShiftExtend;
5404
5405 Optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
5406 if (!MaybeAndMask)
5407 return AArch64_AM::InvalidShiftExtend;
5408 uint64_t AndMask = *MaybeAndMask;
5409 switch (AndMask) {
5410 default:
5411 return AArch64_AM::InvalidShiftExtend;
5412 case 0xFF:
5413 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
5414 case 0xFFFF:
5415 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
5416 case 0xFFFFFFFF:
5417 return AArch64_AM::UXTW;
5418 }
5419 }
5420
narrowExtendRegIfNeeded(Register ExtReg,MachineIRBuilder & MIB) const5421 Register AArch64InstructionSelector::narrowExtendRegIfNeeded(
5422 Register ExtReg, MachineIRBuilder &MIB) const {
5423 MachineRegisterInfo &MRI = *MIB.getMRI();
5424 if (MRI.getType(ExtReg).getSizeInBits() == 32)
5425 return ExtReg;
5426
5427 // Insert a copy to move ExtReg to GPR32.
5428 Register NarrowReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5429 auto Copy = MIB.buildCopy({NarrowReg}, {ExtReg});
5430
5431 // Select the copy into a subregister copy.
5432 selectCopy(*Copy, TII, MRI, TRI, RBI);
5433 return Copy.getReg(0);
5434 }
5435
widenGPRBankRegIfNeeded(Register Reg,unsigned WideSize,MachineIRBuilder & MIB) const5436 Register AArch64InstructionSelector::widenGPRBankRegIfNeeded(
5437 Register Reg, unsigned WideSize, MachineIRBuilder &MIB) const {
5438 assert(WideSize >= 8 && "WideSize is smaller than all possible registers?");
5439 MachineRegisterInfo &MRI = *MIB.getMRI();
5440 unsigned NarrowSize = MRI.getType(Reg).getSizeInBits();
5441 assert(WideSize >= NarrowSize &&
5442 "WideSize cannot be smaller than NarrowSize!");
5443
5444 // If the sizes match, just return the register.
5445 //
5446 // If NarrowSize is an s1, then we can select it to any size, so we'll treat
5447 // it as a don't care.
5448 if (NarrowSize == WideSize || NarrowSize == 1)
5449 return Reg;
5450
5451 // Now check the register classes.
5452 const RegisterBank *RB = RBI.getRegBank(Reg, MRI, TRI);
5453 const TargetRegisterClass *OrigRC = getMinClassForRegBank(*RB, NarrowSize);
5454 const TargetRegisterClass *WideRC = getMinClassForRegBank(*RB, WideSize);
5455 assert(OrigRC && "Could not determine narrow RC?");
5456 assert(WideRC && "Could not determine wide RC?");
5457
5458 // If the sizes differ, but the register classes are the same, there is no
5459 // need to insert a SUBREG_TO_REG.
5460 //
5461 // For example, an s8 that's supposed to be a GPR will be selected to either
5462 // a GPR32 or a GPR64 register. Note that this assumes that the s8 will
5463 // always end up on a GPR32.
5464 if (OrigRC == WideRC)
5465 return Reg;
5466
5467 // We have two different register classes. Insert a SUBREG_TO_REG.
5468 unsigned SubReg = 0;
5469 getSubRegForClass(OrigRC, TRI, SubReg);
5470 assert(SubReg && "Couldn't determine subregister?");
5471
5472 // Build the SUBREG_TO_REG and return the new, widened register.
5473 auto SubRegToReg =
5474 MIB.buildInstr(AArch64::SUBREG_TO_REG, {WideRC}, {})
5475 .addImm(0)
5476 .addUse(Reg)
5477 .addImm(SubReg);
5478 constrainSelectedInstRegOperands(*SubRegToReg, TII, TRI, RBI);
5479 return SubRegToReg.getReg(0);
5480 }
5481
5482 /// Select an "extended register" operand. This operand folds in an extend
5483 /// followed by an optional left shift.
5484 InstructionSelector::ComplexRendererFns
selectArithExtendedRegister(MachineOperand & Root) const5485 AArch64InstructionSelector::selectArithExtendedRegister(
5486 MachineOperand &Root) const {
5487 if (!Root.isReg())
5488 return None;
5489 MachineRegisterInfo &MRI =
5490 Root.getParent()->getParent()->getParent()->getRegInfo();
5491
5492 uint64_t ShiftVal = 0;
5493 Register ExtReg;
5494 AArch64_AM::ShiftExtendType Ext;
5495 MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
5496 if (!RootDef)
5497 return None;
5498
5499 if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI))
5500 return None;
5501
5502 // Check if we can fold a shift and an extend.
5503 if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
5504 // Look for a constant on the RHS of the shift.
5505 MachineOperand &RHS = RootDef->getOperand(2);
5506 Optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
5507 if (!MaybeShiftVal)
5508 return None;
5509 ShiftVal = *MaybeShiftVal;
5510 if (ShiftVal > 4)
5511 return None;
5512 // Look for a valid extend instruction on the LHS of the shift.
5513 MachineOperand &LHS = RootDef->getOperand(1);
5514 MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
5515 if (!ExtDef)
5516 return None;
5517 Ext = getExtendTypeForInst(*ExtDef, MRI);
5518 if (Ext == AArch64_AM::InvalidShiftExtend)
5519 return None;
5520 ExtReg = ExtDef->getOperand(1).getReg();
5521 } else {
5522 // Didn't get a shift. Try just folding an extend.
5523 Ext = getExtendTypeForInst(*RootDef, MRI);
5524 if (Ext == AArch64_AM::InvalidShiftExtend)
5525 return None;
5526 ExtReg = RootDef->getOperand(1).getReg();
5527
5528 // If we have a 32 bit instruction which zeroes out the high half of a
5529 // register, we get an implicit zero extend for free. Check if we have one.
5530 // FIXME: We actually emit the extend right now even though we don't have
5531 // to.
5532 if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
5533 MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
5534 if (ExtInst && isDef32(*ExtInst))
5535 return None;
5536 }
5537 }
5538
5539 // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
5540 // copy.
5541 MachineIRBuilder MIB(*RootDef);
5542 ExtReg = narrowExtendRegIfNeeded(ExtReg, MIB);
5543
5544 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
5545 [=](MachineInstrBuilder &MIB) {
5546 MIB.addImm(getArithExtendImm(Ext, ShiftVal));
5547 }}};
5548 }
5549
renderTruncImm(MachineInstrBuilder & MIB,const MachineInstr & MI,int OpIdx) const5550 void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
5551 const MachineInstr &MI,
5552 int OpIdx) const {
5553 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
5554 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5555 "Expected G_CONSTANT");
5556 Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI);
5557 assert(CstVal && "Expected constant value");
5558 MIB.addImm(CstVal.getValue());
5559 }
5560
renderLogicalImm32(MachineInstrBuilder & MIB,const MachineInstr & I,int OpIdx) const5561 void AArch64InstructionSelector::renderLogicalImm32(
5562 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
5563 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5564 "Expected G_CONSTANT");
5565 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
5566 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);
5567 MIB.addImm(Enc);
5568 }
5569
renderLogicalImm64(MachineInstrBuilder & MIB,const MachineInstr & I,int OpIdx) const5570 void AArch64InstructionSelector::renderLogicalImm64(
5571 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
5572 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5573 "Expected G_CONSTANT");
5574 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
5575 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);
5576 MIB.addImm(Enc);
5577 }
5578
isLoadStoreOfNumBytes(const MachineInstr & MI,unsigned NumBytes) const5579 bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
5580 const MachineInstr &MI, unsigned NumBytes) const {
5581 if (!MI.mayLoadOrStore())
5582 return false;
5583 assert(MI.hasOneMemOperand() &&
5584 "Expected load/store to have only one mem op!");
5585 return (*MI.memoperands_begin())->getSize() == NumBytes;
5586 }
5587
isDef32(const MachineInstr & MI) const5588 bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
5589 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
5590 if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
5591 return false;
5592
5593 // Only return true if we know the operation will zero-out the high half of
5594 // the 64-bit register. Truncates can be subregister copies, which don't
5595 // zero out the high bits. Copies and other copy-like instructions can be
5596 // fed by truncates, or could be lowered as subregister copies.
5597 switch (MI.getOpcode()) {
5598 default:
5599 return true;
5600 case TargetOpcode::COPY:
5601 case TargetOpcode::G_BITCAST:
5602 case TargetOpcode::G_TRUNC:
5603 case TargetOpcode::G_PHI:
5604 return false;
5605 }
5606 }
5607
5608
5609 // Perform fixups on the given PHI instruction's operands to force them all
5610 // to be the same as the destination regbank.
fixupPHIOpBanks(MachineInstr & MI,MachineRegisterInfo & MRI,const AArch64RegisterBankInfo & RBI)5611 static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI,
5612 const AArch64RegisterBankInfo &RBI) {
5613 assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
5614 Register DstReg = MI.getOperand(0).getReg();
5615 const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
5616 assert(DstRB && "Expected PHI dst to have regbank assigned");
5617 MachineIRBuilder MIB(MI);
5618
5619 // Go through each operand and ensure it has the same regbank.
5620 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
5621 MachineOperand &MO = MI.getOperand(OpIdx);
5622 if (!MO.isReg())
5623 continue;
5624 Register OpReg = MO.getReg();
5625 const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
5626 if (RB != DstRB) {
5627 // Insert a cross-bank copy.
5628 auto *OpDef = MRI.getVRegDef(OpReg);
5629 const LLT &Ty = MRI.getType(OpReg);
5630 MIB.setInsertPt(*OpDef->getParent(), std::next(OpDef->getIterator()));
5631 auto Copy = MIB.buildCopy(Ty, OpReg);
5632 MRI.setRegBank(Copy.getReg(0), *DstRB);
5633 MO.setReg(Copy.getReg(0));
5634 }
5635 }
5636 }
5637
processPHIs(MachineFunction & MF)5638 void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
5639 // We're looking for PHIs, build a list so we don't invalidate iterators.
5640 MachineRegisterInfo &MRI = MF.getRegInfo();
5641 SmallVector<MachineInstr *, 32> Phis;
5642 for (auto &BB : MF) {
5643 for (auto &MI : BB) {
5644 if (MI.getOpcode() == TargetOpcode::G_PHI)
5645 Phis.emplace_back(&MI);
5646 }
5647 }
5648
5649 for (auto *MI : Phis) {
5650 // We need to do some work here if the operand types are < 16 bit and they
5651 // are split across fpr/gpr banks. Since all types <32b on gpr
5652 // end up being assigned gpr32 regclasses, we can end up with PHIs here
5653 // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
5654 // be selecting heterogenous regbanks for operands if possible, but we
5655 // still need to be able to deal with it here.
5656 //
5657 // To fix this, if we have a gpr-bank operand < 32b in size and at least
5658 // one other operand is on the fpr bank, then we add cross-bank copies
5659 // to homogenize the operand banks. For simplicity the bank that we choose
5660 // to settle on is whatever bank the def operand has. For example:
5661 //
5662 // %endbb:
5663 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
5664 // =>
5665 // %bb2:
5666 // ...
5667 // %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
5668 // ...
5669 // %endbb:
5670 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
5671 bool HasGPROp = false, HasFPROp = false;
5672 for (unsigned OpIdx = 1; OpIdx < MI->getNumOperands(); ++OpIdx) {
5673 const auto &MO = MI->getOperand(OpIdx);
5674 if (!MO.isReg())
5675 continue;
5676 const LLT &Ty = MRI.getType(MO.getReg());
5677 if (!Ty.isValid() || !Ty.isScalar())
5678 break;
5679 if (Ty.getSizeInBits() >= 32)
5680 break;
5681 const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
5682 // If for some reason we don't have a regbank yet. Don't try anything.
5683 if (!RB)
5684 break;
5685
5686 if (RB->getID() == AArch64::GPRRegBankID)
5687 HasGPROp = true;
5688 else
5689 HasFPROp = true;
5690 }
5691 // We have heterogenous regbanks, need to fixup.
5692 if (HasGPROp && HasFPROp)
5693 fixupPHIOpBanks(*MI, MRI, RBI);
5694 }
5695 }
5696
5697 namespace llvm {
5698 InstructionSelector *
createAArch64InstructionSelector(const AArch64TargetMachine & TM,AArch64Subtarget & Subtarget,AArch64RegisterBankInfo & RBI)5699 createAArch64InstructionSelector(const AArch64TargetMachine &TM,
5700 AArch64Subtarget &Subtarget,
5701 AArch64RegisterBankInfo &RBI) {
5702 return new AArch64InstructionSelector(TM, Subtarget, RBI);
5703 }
5704 }
5705