1 //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the InstructionSelector class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64InstrInfo.h"
15 #include "AArch64MachineFunctionInfo.h"
16 #include "AArch64RegisterBankInfo.h"
17 #include "AArch64RegisterInfo.h"
18 #include "AArch64Subtarget.h"
19 #include "AArch64TargetMachine.h"
20 #include "MCTargetDesc/AArch64AddressingModes.h"
21 #include "llvm/ADT/Optional.h"
22 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
23 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
24 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
25 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
26 #include "llvm/CodeGen/GlobalISel/Utils.h"
27 #include "llvm/CodeGen/MachineBasicBlock.h"
28 #include "llvm/CodeGen/MachineConstantPool.h"
29 #include "llvm/CodeGen/MachineFunction.h"
30 #include "llvm/CodeGen/MachineInstr.h"
31 #include "llvm/CodeGen/MachineInstrBuilder.h"
32 #include "llvm/CodeGen/MachineOperand.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/TargetOpcodes.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/Type.h"
37 #include "llvm/IR/IntrinsicsAArch64.h"
38 #include "llvm/Support/Debug.h"
39 #include "llvm/Support/raw_ostream.h"
40 
41 #define DEBUG_TYPE "aarch64-isel"
42 
43 using namespace llvm;
44 
45 namespace {
46 
47 #define GET_GLOBALISEL_PREDICATE_BITSET
48 #include "AArch64GenGlobalISel.inc"
49 #undef GET_GLOBALISEL_PREDICATE_BITSET
50 
51 class AArch64InstructionSelector : public InstructionSelector {
52 public:
53   AArch64InstructionSelector(const AArch64TargetMachine &TM,
54                              const AArch64Subtarget &STI,
55                              const AArch64RegisterBankInfo &RBI);
56 
57   bool select(MachineInstr &I) override;
getName()58   static const char *getName() { return DEBUG_TYPE; }
59 
setupMF(MachineFunction & MF,GISelKnownBits & KB,CodeGenCoverage & CoverageInfo)60   void setupMF(MachineFunction &MF, GISelKnownBits &KB,
61                CodeGenCoverage &CoverageInfo) override {
62     InstructionSelector::setupMF(MF, KB, CoverageInfo);
63 
64     // hasFnAttribute() is expensive to call on every BRCOND selection, so
65     // cache it here for each run of the selector.
66     ProduceNonFlagSettingCondBr =
67         !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
68     MFReturnAddr = Register();
69 
70     processPHIs(MF);
71   }
72 
73 private:
74   /// tblgen-erated 'select' implementation, used as the initial selector for
75   /// the patterns that don't require complex C++.
76   bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
77 
78   // A lowering phase that runs before any selection attempts.
79   // Returns true if the instruction was modified.
80   bool preISelLower(MachineInstr &I);
81 
82   // An early selection function that runs before the selectImpl() call.
83   bool earlySelect(MachineInstr &I) const;
84 
85   // Do some preprocessing of G_PHIs before we begin selection.
86   void processPHIs(MachineFunction &MF);
87 
88   bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
89 
90   /// Eliminate same-sized cross-bank copies into stores before selectImpl().
91   bool contractCrossBankCopyIntoStore(MachineInstr &I,
92                                       MachineRegisterInfo &MRI);
93 
94   bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
95 
96   bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
97                           MachineRegisterInfo &MRI) const;
98   bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
99                            MachineRegisterInfo &MRI) const;
100 
101   bool tryOptAndIntoCompareBranch(MachineInstr *LHS,
102                                   int64_t CmpConstant,
103                                   const CmpInst::Predicate &Pred,
104                                   MachineBasicBlock *DstMBB,
105                                   MachineIRBuilder &MIB) const;
106   bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
107                            MachineRegisterInfo &MRI) const;
108 
109   bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const;
110   bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
111 
112   // Helper to generate an equivalent of scalar_to_vector into a new register,
113   // returned via 'Dst'.
114   MachineInstr *emitScalarToVector(unsigned EltSize,
115                                    const TargetRegisterClass *DstRC,
116                                    Register Scalar,
117                                    MachineIRBuilder &MIRBuilder) const;
118 
119   /// Emit a lane insert into \p DstReg, or a new vector register if None is
120   /// provided.
121   ///
122   /// The lane inserted into is defined by \p LaneIdx. The vector source
123   /// register is given by \p SrcReg. The register containing the element is
124   /// given by \p EltReg.
125   MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
126                                Register EltReg, unsigned LaneIdx,
127                                const RegisterBank &RB,
128                                MachineIRBuilder &MIRBuilder) const;
129   bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
130   bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
131                               MachineRegisterInfo &MRI) const;
132   bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
133   bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
134   bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
135 
136   bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
137   bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
138   bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
139   bool selectSplitVectorUnmerge(MachineInstr &I,
140                                 MachineRegisterInfo &MRI) const;
141   bool selectIntrinsicWithSideEffects(MachineInstr &I,
142                                       MachineRegisterInfo &MRI) const;
143   bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
144   bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
145   bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
146   bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
147   bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
148   bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
149   bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const;
150 
151   unsigned emitConstantPoolEntry(const Constant *CPVal,
152                                  MachineFunction &MF) const;
153   MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
154                                          MachineIRBuilder &MIRBuilder) const;
155 
156   // Emit a vector concat operation.
157   MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
158                                  Register Op2,
159                                  MachineIRBuilder &MIRBuilder) const;
160 
161   // Emit an integer compare between LHS and RHS, which checks for Predicate.
162   //
163   // This returns the produced compare instruction, and the predicate which
164   // was ultimately used in the compare. The predicate may differ from what
165   // is passed in \p Predicate due to optimization.
166   std::pair<MachineInstr *, CmpInst::Predicate>
167   emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
168                      MachineOperand &Predicate,
169                      MachineIRBuilder &MIRBuilder) const;
170   MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, MachineOperand &RHS,
171                         MachineIRBuilder &MIRBuilder) const;
172   MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
173                         MachineIRBuilder &MIRBuilder) const;
174   MachineInstr *emitTST(const Register &LHS, const Register &RHS,
175                         MachineIRBuilder &MIRBuilder) const;
176   MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
177                                      const RegisterBank &DstRB, LLT ScalarTy,
178                                      Register VecReg, unsigned LaneIdx,
179                                      MachineIRBuilder &MIRBuilder) const;
180 
181   /// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be
182   /// materialized using a FMOV instruction, then update MI and return it.
183   /// Otherwise, do nothing and return a nullptr.
184   MachineInstr *emitFMovForFConstant(MachineInstr &MI,
185                                      MachineRegisterInfo &MRI) const;
186 
187   /// Emit a CSet for a compare.
188   MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
189                                 MachineIRBuilder &MIRBuilder) const;
190 
191   /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
192   /// \p IsNegative is true if the test should be "not zero".
193   /// This will also optimize the test bit instruction when possible.
194   MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
195                             MachineBasicBlock *DstMBB,
196                             MachineIRBuilder &MIB) const;
197 
198   // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
199   // We use these manually instead of using the importer since it doesn't
200   // support SDNodeXForm.
201   ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
202   ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
203   ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
204   ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
205 
206   ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
207   ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
208   ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
209 
210   ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
211                                             unsigned Size) const;
212 
selectAddrModeUnscaled8(MachineOperand & Root) const213   ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
214     return selectAddrModeUnscaled(Root, 1);
215   }
selectAddrModeUnscaled16(MachineOperand & Root) const216   ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
217     return selectAddrModeUnscaled(Root, 2);
218   }
selectAddrModeUnscaled32(MachineOperand & Root) const219   ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
220     return selectAddrModeUnscaled(Root, 4);
221   }
selectAddrModeUnscaled64(MachineOperand & Root) const222   ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
223     return selectAddrModeUnscaled(Root, 8);
224   }
selectAddrModeUnscaled128(MachineOperand & Root) const225   ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
226     return selectAddrModeUnscaled(Root, 16);
227   }
228 
229   /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
230   /// from complex pattern matchers like selectAddrModeIndexed().
231   ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
232                                           MachineRegisterInfo &MRI) const;
233 
234   ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
235                                            unsigned Size) const;
236   template <int Width>
selectAddrModeIndexed(MachineOperand & Root) const237   ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
238     return selectAddrModeIndexed(Root, Width / 8);
239   }
240 
241   bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
242                                      const MachineRegisterInfo &MRI) const;
243   ComplexRendererFns
244   selectAddrModeShiftedExtendXReg(MachineOperand &Root,
245                                   unsigned SizeInBytes) const;
246 
247   /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
248   /// or not a shift + extend should be folded into an addressing mode. Returns
249   /// None when this is not profitable or possible.
250   ComplexRendererFns
251   selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
252                     MachineOperand &Offset, unsigned SizeInBytes,
253                     bool WantsExt) const;
254   ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
255   ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
256                                        unsigned SizeInBytes) const;
257   template <int Width>
selectAddrModeXRO(MachineOperand & Root) const258   ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
259     return selectAddrModeXRO(Root, Width / 8);
260   }
261 
262   ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
263                                        unsigned SizeInBytes) const;
264   template <int Width>
selectAddrModeWRO(MachineOperand & Root) const265   ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
266     return selectAddrModeWRO(Root, Width / 8);
267   }
268 
269   ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const;
270 
selectArithShiftedRegister(MachineOperand & Root) const271   ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
272     return selectShiftedRegister(Root);
273   }
274 
selectLogicalShiftedRegister(MachineOperand & Root) const275   ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
276     // TODO: selectShiftedRegister should allow for rotates on logical shifts.
277     // For now, make them the same. The only difference between the two is that
278     // logical shifts are allowed to fold in rotates. Otherwise, these are
279     // functionally the same.
280     return selectShiftedRegister(Root);
281   }
282 
283   /// Given an extend instruction, determine the correct shift-extend type for
284   /// that instruction.
285   ///
286   /// If the instruction is going to be used in a load or store, pass
287   /// \p IsLoadStore = true.
288   AArch64_AM::ShiftExtendType
289   getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
290                        bool IsLoadStore = false) const;
291 
292   /// Instructions that accept extend modifiers like UXTW expect the register
293   /// being extended to be a GPR32. Narrow ExtReg to a 32-bit register using a
294   /// subregister copy if necessary. Return either ExtReg, or the result of the
295   /// new copy.
296   Register narrowExtendRegIfNeeded(Register ExtReg,
297                                              MachineIRBuilder &MIB) const;
298   Register widenGPRBankRegIfNeeded(Register Reg, unsigned Size,
299                                    MachineIRBuilder &MIB) const;
300   ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
301 
302   void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
303                       int OpIdx = -1) const;
304   void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
305                           int OpIdx = -1) const;
306   void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
307                           int OpIdx = -1) const;
308 
309   // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
310   void materializeLargeCMVal(MachineInstr &I, const Value *V,
311                              unsigned OpFlags) const;
312 
313   // Optimization methods.
314   bool tryOptSelect(MachineInstr &MI) const;
315   MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
316                                       MachineOperand &Predicate,
317                                       MachineIRBuilder &MIRBuilder) const;
318   MachineInstr *tryOptArithImmedIntegerCompare(MachineOperand &LHS,
319                                                MachineOperand &RHS,
320                                                CmpInst::Predicate &Predicate,
321                                                MachineIRBuilder &MIB) const;
322   MachineInstr *tryOptArithShiftedCompare(MachineOperand &LHS,
323                                           MachineOperand &RHS,
324                                           MachineIRBuilder &MIB) const;
325 
326   /// Return true if \p MI is a load or store of \p NumBytes bytes.
327   bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
328 
329   /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
330   /// register zeroed out. In other words, the result of MI has been explicitly
331   /// zero extended.
332   bool isDef32(const MachineInstr &MI) const;
333 
334   const AArch64TargetMachine &TM;
335   const AArch64Subtarget &STI;
336   const AArch64InstrInfo &TII;
337   const AArch64RegisterInfo &TRI;
338   const AArch64RegisterBankInfo &RBI;
339 
340   bool ProduceNonFlagSettingCondBr = false;
341 
342   // Some cached values used during selection.
343   // We use LR as a live-in register, and we keep track of it here as it can be
344   // clobbered by calls.
345   Register MFReturnAddr;
346 
347 #define GET_GLOBALISEL_PREDICATES_DECL
348 #include "AArch64GenGlobalISel.inc"
349 #undef GET_GLOBALISEL_PREDICATES_DECL
350 
351 // We declare the temporaries used by selectImpl() in the class to minimize the
352 // cost of constructing placeholder values.
353 #define GET_GLOBALISEL_TEMPORARIES_DECL
354 #include "AArch64GenGlobalISel.inc"
355 #undef GET_GLOBALISEL_TEMPORARIES_DECL
356 };
357 
358 } // end anonymous namespace
359 
360 #define GET_GLOBALISEL_IMPL
361 #include "AArch64GenGlobalISel.inc"
362 #undef GET_GLOBALISEL_IMPL
363 
AArch64InstructionSelector(const AArch64TargetMachine & TM,const AArch64Subtarget & STI,const AArch64RegisterBankInfo & RBI)364 AArch64InstructionSelector::AArch64InstructionSelector(
365     const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
366     const AArch64RegisterBankInfo &RBI)
367     : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
368       TRI(*STI.getRegisterInfo()), RBI(RBI),
369 #define GET_GLOBALISEL_PREDICATES_INIT
370 #include "AArch64GenGlobalISel.inc"
371 #undef GET_GLOBALISEL_PREDICATES_INIT
372 #define GET_GLOBALISEL_TEMPORARIES_INIT
373 #include "AArch64GenGlobalISel.inc"
374 #undef GET_GLOBALISEL_TEMPORARIES_INIT
375 {
376 }
377 
378 // FIXME: This should be target-independent, inferred from the types declared
379 // for each class in the bank.
380 static const TargetRegisterClass *
getRegClassForTypeOnBank(LLT Ty,const RegisterBank & RB,const RegisterBankInfo & RBI,bool GetAllRegSet=false)381 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
382                          const RegisterBankInfo &RBI,
383                          bool GetAllRegSet = false) {
384   if (RB.getID() == AArch64::GPRRegBankID) {
385     if (Ty.getSizeInBits() <= 32)
386       return GetAllRegSet ? &AArch64::GPR32allRegClass
387                           : &AArch64::GPR32RegClass;
388     if (Ty.getSizeInBits() == 64)
389       return GetAllRegSet ? &AArch64::GPR64allRegClass
390                           : &AArch64::GPR64RegClass;
391     return nullptr;
392   }
393 
394   if (RB.getID() == AArch64::FPRRegBankID) {
395     if (Ty.getSizeInBits() <= 16)
396       return &AArch64::FPR16RegClass;
397     if (Ty.getSizeInBits() == 32)
398       return &AArch64::FPR32RegClass;
399     if (Ty.getSizeInBits() == 64)
400       return &AArch64::FPR64RegClass;
401     if (Ty.getSizeInBits() == 128)
402       return &AArch64::FPR128RegClass;
403     return nullptr;
404   }
405 
406   return nullptr;
407 }
408 
409 /// Given a register bank, and size in bits, return the smallest register class
410 /// that can represent that combination.
411 static const TargetRegisterClass *
getMinClassForRegBank(const RegisterBank & RB,unsigned SizeInBits,bool GetAllRegSet=false)412 getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
413                       bool GetAllRegSet = false) {
414   unsigned RegBankID = RB.getID();
415 
416   if (RegBankID == AArch64::GPRRegBankID) {
417     if (SizeInBits <= 32)
418       return GetAllRegSet ? &AArch64::GPR32allRegClass
419                           : &AArch64::GPR32RegClass;
420     if (SizeInBits == 64)
421       return GetAllRegSet ? &AArch64::GPR64allRegClass
422                           : &AArch64::GPR64RegClass;
423   }
424 
425   if (RegBankID == AArch64::FPRRegBankID) {
426     switch (SizeInBits) {
427     default:
428       return nullptr;
429     case 8:
430       return &AArch64::FPR8RegClass;
431     case 16:
432       return &AArch64::FPR16RegClass;
433     case 32:
434       return &AArch64::FPR32RegClass;
435     case 64:
436       return &AArch64::FPR64RegClass;
437     case 128:
438       return &AArch64::FPR128RegClass;
439     }
440   }
441 
442   return nullptr;
443 }
444 
445 /// Returns the correct subregister to use for a given register class.
getSubRegForClass(const TargetRegisterClass * RC,const TargetRegisterInfo & TRI,unsigned & SubReg)446 static bool getSubRegForClass(const TargetRegisterClass *RC,
447                               const TargetRegisterInfo &TRI, unsigned &SubReg) {
448   switch (TRI.getRegSizeInBits(*RC)) {
449   case 8:
450     SubReg = AArch64::bsub;
451     break;
452   case 16:
453     SubReg = AArch64::hsub;
454     break;
455   case 32:
456     if (RC != &AArch64::FPR32RegClass)
457       SubReg = AArch64::sub_32;
458     else
459       SubReg = AArch64::ssub;
460     break;
461   case 64:
462     SubReg = AArch64::dsub;
463     break;
464   default:
465     LLVM_DEBUG(
466         dbgs() << "Couldn't find appropriate subregister for register class.");
467     return false;
468   }
469 
470   return true;
471 }
472 
473 /// Returns the minimum size the given register bank can hold.
getMinSizeForRegBank(const RegisterBank & RB)474 static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
475   switch (RB.getID()) {
476   case AArch64::GPRRegBankID:
477     return 32;
478   case AArch64::FPRRegBankID:
479     return 8;
480   default:
481     llvm_unreachable("Tried to get minimum size for unknown register bank.");
482   }
483 }
484 
getImmedFromMO(const MachineOperand & Root)485 static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
486   auto &MI = *Root.getParent();
487   auto &MBB = *MI.getParent();
488   auto &MF = *MBB.getParent();
489   auto &MRI = MF.getRegInfo();
490   uint64_t Immed;
491   if (Root.isImm())
492     Immed = Root.getImm();
493   else if (Root.isCImm())
494     Immed = Root.getCImm()->getZExtValue();
495   else if (Root.isReg()) {
496     auto ValAndVReg =
497         getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
498     if (!ValAndVReg)
499       return None;
500     Immed = ValAndVReg->Value;
501   } else
502     return None;
503   return Immed;
504 }
505 
506 /// Check whether \p I is a currently unsupported binary operation:
507 /// - it has an unsized type
508 /// - an operand is not a vreg
509 /// - all operands are not in the same bank
510 /// These are checks that should someday live in the verifier, but right now,
511 /// these are mostly limitations of the aarch64 selector.
unsupportedBinOp(const MachineInstr & I,const AArch64RegisterBankInfo & RBI,const MachineRegisterInfo & MRI,const AArch64RegisterInfo & TRI)512 static bool unsupportedBinOp(const MachineInstr &I,
513                              const AArch64RegisterBankInfo &RBI,
514                              const MachineRegisterInfo &MRI,
515                              const AArch64RegisterInfo &TRI) {
516   LLT Ty = MRI.getType(I.getOperand(0).getReg());
517   if (!Ty.isValid()) {
518     LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
519     return true;
520   }
521 
522   const RegisterBank *PrevOpBank = nullptr;
523   for (auto &MO : I.operands()) {
524     // FIXME: Support non-register operands.
525     if (!MO.isReg()) {
526       LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
527       return true;
528     }
529 
530     // FIXME: Can generic operations have physical registers operands? If
531     // so, this will need to be taught about that, and we'll need to get the
532     // bank out of the minimal class for the register.
533     // Either way, this needs to be documented (and possibly verified).
534     if (!Register::isVirtualRegister(MO.getReg())) {
535       LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
536       return true;
537     }
538 
539     const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
540     if (!OpBank) {
541       LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
542       return true;
543     }
544 
545     if (PrevOpBank && OpBank != PrevOpBank) {
546       LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
547       return true;
548     }
549     PrevOpBank = OpBank;
550   }
551   return false;
552 }
553 
554 /// Select the AArch64 opcode for the basic binary operation \p GenericOpc
555 /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
556 /// and of size \p OpSize.
557 /// \returns \p GenericOpc if the combination is unsupported.
selectBinaryOp(unsigned GenericOpc,unsigned RegBankID,unsigned OpSize)558 static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
559                                unsigned OpSize) {
560   switch (RegBankID) {
561   case AArch64::GPRRegBankID:
562     if (OpSize == 32) {
563       switch (GenericOpc) {
564       case TargetOpcode::G_SHL:
565         return AArch64::LSLVWr;
566       case TargetOpcode::G_LSHR:
567         return AArch64::LSRVWr;
568       case TargetOpcode::G_ASHR:
569         return AArch64::ASRVWr;
570       default:
571         return GenericOpc;
572       }
573     } else if (OpSize == 64) {
574       switch (GenericOpc) {
575       case TargetOpcode::G_PTR_ADD:
576         return AArch64::ADDXrr;
577       case TargetOpcode::G_SHL:
578         return AArch64::LSLVXr;
579       case TargetOpcode::G_LSHR:
580         return AArch64::LSRVXr;
581       case TargetOpcode::G_ASHR:
582         return AArch64::ASRVXr;
583       default:
584         return GenericOpc;
585       }
586     }
587     break;
588   case AArch64::FPRRegBankID:
589     switch (OpSize) {
590     case 32:
591       switch (GenericOpc) {
592       case TargetOpcode::G_FADD:
593         return AArch64::FADDSrr;
594       case TargetOpcode::G_FSUB:
595         return AArch64::FSUBSrr;
596       case TargetOpcode::G_FMUL:
597         return AArch64::FMULSrr;
598       case TargetOpcode::G_FDIV:
599         return AArch64::FDIVSrr;
600       default:
601         return GenericOpc;
602       }
603     case 64:
604       switch (GenericOpc) {
605       case TargetOpcode::G_FADD:
606         return AArch64::FADDDrr;
607       case TargetOpcode::G_FSUB:
608         return AArch64::FSUBDrr;
609       case TargetOpcode::G_FMUL:
610         return AArch64::FMULDrr;
611       case TargetOpcode::G_FDIV:
612         return AArch64::FDIVDrr;
613       case TargetOpcode::G_OR:
614         return AArch64::ORRv8i8;
615       default:
616         return GenericOpc;
617       }
618     }
619     break;
620   }
621   return GenericOpc;
622 }
623 
624 /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
625 /// appropriate for the (value) register bank \p RegBankID and of memory access
626 /// size \p OpSize.  This returns the variant with the base+unsigned-immediate
627 /// addressing mode (e.g., LDRXui).
628 /// \returns \p GenericOpc if the combination is unsupported.
selectLoadStoreUIOp(unsigned GenericOpc,unsigned RegBankID,unsigned OpSize)629 static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
630                                     unsigned OpSize) {
631   const bool isStore = GenericOpc == TargetOpcode::G_STORE;
632   switch (RegBankID) {
633   case AArch64::GPRRegBankID:
634     switch (OpSize) {
635     case 8:
636       return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
637     case 16:
638       return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
639     case 32:
640       return isStore ? AArch64::STRWui : AArch64::LDRWui;
641     case 64:
642       return isStore ? AArch64::STRXui : AArch64::LDRXui;
643     }
644     break;
645   case AArch64::FPRRegBankID:
646     switch (OpSize) {
647     case 8:
648       return isStore ? AArch64::STRBui : AArch64::LDRBui;
649     case 16:
650       return isStore ? AArch64::STRHui : AArch64::LDRHui;
651     case 32:
652       return isStore ? AArch64::STRSui : AArch64::LDRSui;
653     case 64:
654       return isStore ? AArch64::STRDui : AArch64::LDRDui;
655     }
656     break;
657   }
658   return GenericOpc;
659 }
660 
661 #ifndef NDEBUG
662 /// Helper function that verifies that we have a valid copy at the end of
663 /// selectCopy. Verifies that the source and dest have the expected sizes and
664 /// then returns true.
isValidCopy(const MachineInstr & I,const RegisterBank & DstBank,const MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,const RegisterBankInfo & RBI)665 static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
666                         const MachineRegisterInfo &MRI,
667                         const TargetRegisterInfo &TRI,
668                         const RegisterBankInfo &RBI) {
669   const Register DstReg = I.getOperand(0).getReg();
670   const Register SrcReg = I.getOperand(1).getReg();
671   const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
672   const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
673 
674   // Make sure the size of the source and dest line up.
675   assert(
676       (DstSize == SrcSize ||
677        // Copies are a mean to setup initial types, the number of
678        // bits may not exactly match.
679        (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
680        // Copies are a mean to copy bits around, as long as we are
681        // on the same register class, that's fine. Otherwise, that
682        // means we need some SUBREG_TO_REG or AND & co.
683        (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
684       "Copy with different width?!");
685 
686   // Check the size of the destination.
687   assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
688          "GPRs cannot get more than 64-bit width values");
689 
690   return true;
691 }
692 #endif
693 
694 /// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
695 /// to \p *To.
696 ///
697 /// E.g "To = COPY SrcReg:SubReg"
copySubReg(MachineInstr & I,MachineRegisterInfo & MRI,const RegisterBankInfo & RBI,Register SrcReg,const TargetRegisterClass * To,unsigned SubReg)698 static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
699                        const RegisterBankInfo &RBI, Register SrcReg,
700                        const TargetRegisterClass *To, unsigned SubReg) {
701   assert(SrcReg.isValid() && "Expected a valid source register?");
702   assert(To && "Destination register class cannot be null");
703   assert(SubReg && "Expected a valid subregister");
704 
705   MachineIRBuilder MIB(I);
706   auto SubRegCopy =
707       MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
708   MachineOperand &RegOp = I.getOperand(1);
709   RegOp.setReg(SubRegCopy.getReg(0));
710 
711   // It's possible that the destination register won't be constrained. Make
712   // sure that happens.
713   if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
714     RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
715 
716   return true;
717 }
718 
719 /// Helper function to get the source and destination register classes for a
720 /// copy. Returns a std::pair containing the source register class for the
721 /// copy, and the destination register class for the copy. If a register class
722 /// cannot be determined, then it will be nullptr.
723 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
getRegClassesForCopy(MachineInstr & I,const TargetInstrInfo & TII,MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,const RegisterBankInfo & RBI)724 getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
725                      MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
726                      const RegisterBankInfo &RBI) {
727   Register DstReg = I.getOperand(0).getReg();
728   Register SrcReg = I.getOperand(1).getReg();
729   const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
730   const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
731   unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
732   unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
733 
734   // Special casing for cross-bank copies of s1s. We can technically represent
735   // a 1-bit value with any size of register. The minimum size for a GPR is 32
736   // bits. So, we need to put the FPR on 32 bits as well.
737   //
738   // FIXME: I'm not sure if this case holds true outside of copies. If it does,
739   // then we can pull it into the helpers that get the appropriate class for a
740   // register bank. Or make a new helper that carries along some constraint
741   // information.
742   if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
743     SrcSize = DstSize = 32;
744 
745   return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
746           getMinClassForRegBank(DstRegBank, DstSize, true)};
747 }
748 
selectCopy(MachineInstr & I,const TargetInstrInfo & TII,MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,const RegisterBankInfo & RBI)749 static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
750                        MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
751                        const RegisterBankInfo &RBI) {
752   Register DstReg = I.getOperand(0).getReg();
753   Register SrcReg = I.getOperand(1).getReg();
754   const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
755   const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
756 
757   // Find the correct register classes for the source and destination registers.
758   const TargetRegisterClass *SrcRC;
759   const TargetRegisterClass *DstRC;
760   std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
761 
762   if (!DstRC) {
763     LLVM_DEBUG(dbgs() << "Unexpected dest size "
764                       << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
765     return false;
766   }
767 
768   // A couple helpers below, for making sure that the copy we produce is valid.
769 
770   // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
771   // to verify that the src and dst are the same size, since that's handled by
772   // the SUBREG_TO_REG.
773   bool KnownValid = false;
774 
775   // Returns true, or asserts if something we don't expect happens. Instead of
776   // returning true, we return isValidCopy() to ensure that we verify the
777   // result.
778   auto CheckCopy = [&]() {
779     // If we have a bitcast or something, we can't have physical registers.
780     assert((I.isCopy() ||
781             (!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&
782              !Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&
783            "No phys reg on generic operator!");
784     bool ValidCopy = true;
785 #ifndef NDEBUG
786     ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI);
787     assert(ValidCopy && "Invalid copy.");
788 #endif
789     return ValidCopy;
790   };
791 
792   // Is this a copy? If so, then we may need to insert a subregister copy.
793   if (I.isCopy()) {
794     // Yes. Check if there's anything to fix up.
795     if (!SrcRC) {
796       LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
797       return false;
798     }
799 
800     unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
801     unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
802     unsigned SubReg;
803 
804     // If the source bank doesn't support a subregister copy small enough,
805     // then we first need to copy to the destination bank.
806     if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
807       const TargetRegisterClass *DstTempRC =
808           getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
809       getSubRegForClass(DstRC, TRI, SubReg);
810 
811       MachineIRBuilder MIB(I);
812       auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
813       copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
814     } else if (SrcSize > DstSize) {
815       // If the source register is bigger than the destination we need to
816       // perform a subregister copy.
817       const TargetRegisterClass *SubRegRC =
818           getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
819       getSubRegForClass(SubRegRC, TRI, SubReg);
820       copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
821     } else if (DstSize > SrcSize) {
822       // If the destination register is bigger than the source we need to do
823       // a promotion using SUBREG_TO_REG.
824       const TargetRegisterClass *PromotionRC =
825           getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
826       getSubRegForClass(SrcRC, TRI, SubReg);
827 
828       Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
829       BuildMI(*I.getParent(), I, I.getDebugLoc(),
830               TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
831           .addImm(0)
832           .addUse(SrcReg)
833           .addImm(SubReg);
834       MachineOperand &RegOp = I.getOperand(1);
835       RegOp.setReg(PromoteReg);
836 
837       // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
838       KnownValid = true;
839     }
840 
841     // If the destination is a physical register, then there's nothing to
842     // change, so we're done.
843     if (Register::isPhysicalRegister(DstReg))
844       return CheckCopy();
845   }
846 
847   // No need to constrain SrcReg. It will get constrained when we hit another
848   // of its use or its defs. Copies do not have constraints.
849   if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
850     LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
851                       << " operand\n");
852     return false;
853   }
854   I.setDesc(TII.get(AArch64::COPY));
855   return CheckCopy();
856 }
857 
selectFPConvOpc(unsigned GenericOpc,LLT DstTy,LLT SrcTy)858 static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
859   if (!DstTy.isScalar() || !SrcTy.isScalar())
860     return GenericOpc;
861 
862   const unsigned DstSize = DstTy.getSizeInBits();
863   const unsigned SrcSize = SrcTy.getSizeInBits();
864 
865   switch (DstSize) {
866   case 32:
867     switch (SrcSize) {
868     case 32:
869       switch (GenericOpc) {
870       case TargetOpcode::G_SITOFP:
871         return AArch64::SCVTFUWSri;
872       case TargetOpcode::G_UITOFP:
873         return AArch64::UCVTFUWSri;
874       case TargetOpcode::G_FPTOSI:
875         return AArch64::FCVTZSUWSr;
876       case TargetOpcode::G_FPTOUI:
877         return AArch64::FCVTZUUWSr;
878       default:
879         return GenericOpc;
880       }
881     case 64:
882       switch (GenericOpc) {
883       case TargetOpcode::G_SITOFP:
884         return AArch64::SCVTFUXSri;
885       case TargetOpcode::G_UITOFP:
886         return AArch64::UCVTFUXSri;
887       case TargetOpcode::G_FPTOSI:
888         return AArch64::FCVTZSUWDr;
889       case TargetOpcode::G_FPTOUI:
890         return AArch64::FCVTZUUWDr;
891       default:
892         return GenericOpc;
893       }
894     default:
895       return GenericOpc;
896     }
897   case 64:
898     switch (SrcSize) {
899     case 32:
900       switch (GenericOpc) {
901       case TargetOpcode::G_SITOFP:
902         return AArch64::SCVTFUWDri;
903       case TargetOpcode::G_UITOFP:
904         return AArch64::UCVTFUWDri;
905       case TargetOpcode::G_FPTOSI:
906         return AArch64::FCVTZSUXSr;
907       case TargetOpcode::G_FPTOUI:
908         return AArch64::FCVTZUUXSr;
909       default:
910         return GenericOpc;
911       }
912     case 64:
913       switch (GenericOpc) {
914       case TargetOpcode::G_SITOFP:
915         return AArch64::SCVTFUXDri;
916       case TargetOpcode::G_UITOFP:
917         return AArch64::UCVTFUXDri;
918       case TargetOpcode::G_FPTOSI:
919         return AArch64::FCVTZSUXDr;
920       case TargetOpcode::G_FPTOUI:
921         return AArch64::FCVTZUUXDr;
922       default:
923         return GenericOpc;
924       }
925     default:
926       return GenericOpc;
927     }
928   default:
929     return GenericOpc;
930   };
931   return GenericOpc;
932 }
933 
selectSelectOpc(MachineInstr & I,MachineRegisterInfo & MRI,const RegisterBankInfo & RBI)934 static unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI,
935                                 const RegisterBankInfo &RBI) {
936   const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
937   bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
938                AArch64::GPRRegBankID);
939   LLT Ty = MRI.getType(I.getOperand(0).getReg());
940   if (Ty == LLT::scalar(32))
941     return IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
942   else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64))
943     return IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
944   return 0;
945 }
946 
947 /// Helper function to select the opcode for a G_FCMP.
selectFCMPOpc(MachineInstr & I,MachineRegisterInfo & MRI)948 static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) {
949   // If this is a compare against +0.0, then we don't have to explicitly
950   // materialize a constant.
951   const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI);
952   bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
953   unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
954   if (OpSize != 32 && OpSize != 64)
955     return 0;
956   unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
957                               {AArch64::FCMPSri, AArch64::FCMPDri}};
958   return CmpOpcTbl[ShouldUseImm][OpSize == 64];
959 }
960 
961 /// Returns true if \p P is an unsigned integer comparison predicate.
isUnsignedICMPPred(const CmpInst::Predicate P)962 static bool isUnsignedICMPPred(const CmpInst::Predicate P) {
963   switch (P) {
964   default:
965     return false;
966   case CmpInst::ICMP_UGT:
967   case CmpInst::ICMP_UGE:
968   case CmpInst::ICMP_ULT:
969   case CmpInst::ICMP_ULE:
970     return true;
971   }
972 }
973 
changeICMPPredToAArch64CC(CmpInst::Predicate P)974 static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
975   switch (P) {
976   default:
977     llvm_unreachable("Unknown condition code!");
978   case CmpInst::ICMP_NE:
979     return AArch64CC::NE;
980   case CmpInst::ICMP_EQ:
981     return AArch64CC::EQ;
982   case CmpInst::ICMP_SGT:
983     return AArch64CC::GT;
984   case CmpInst::ICMP_SGE:
985     return AArch64CC::GE;
986   case CmpInst::ICMP_SLT:
987     return AArch64CC::LT;
988   case CmpInst::ICMP_SLE:
989     return AArch64CC::LE;
990   case CmpInst::ICMP_UGT:
991     return AArch64CC::HI;
992   case CmpInst::ICMP_UGE:
993     return AArch64CC::HS;
994   case CmpInst::ICMP_ULT:
995     return AArch64CC::LO;
996   case CmpInst::ICMP_ULE:
997     return AArch64CC::LS;
998   }
999 }
1000 
changeFCMPPredToAArch64CC(CmpInst::Predicate P,AArch64CC::CondCode & CondCode,AArch64CC::CondCode & CondCode2)1001 static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
1002                                       AArch64CC::CondCode &CondCode,
1003                                       AArch64CC::CondCode &CondCode2) {
1004   CondCode2 = AArch64CC::AL;
1005   switch (P) {
1006   default:
1007     llvm_unreachable("Unknown FP condition!");
1008   case CmpInst::FCMP_OEQ:
1009     CondCode = AArch64CC::EQ;
1010     break;
1011   case CmpInst::FCMP_OGT:
1012     CondCode = AArch64CC::GT;
1013     break;
1014   case CmpInst::FCMP_OGE:
1015     CondCode = AArch64CC::GE;
1016     break;
1017   case CmpInst::FCMP_OLT:
1018     CondCode = AArch64CC::MI;
1019     break;
1020   case CmpInst::FCMP_OLE:
1021     CondCode = AArch64CC::LS;
1022     break;
1023   case CmpInst::FCMP_ONE:
1024     CondCode = AArch64CC::MI;
1025     CondCode2 = AArch64CC::GT;
1026     break;
1027   case CmpInst::FCMP_ORD:
1028     CondCode = AArch64CC::VC;
1029     break;
1030   case CmpInst::FCMP_UNO:
1031     CondCode = AArch64CC::VS;
1032     break;
1033   case CmpInst::FCMP_UEQ:
1034     CondCode = AArch64CC::EQ;
1035     CondCode2 = AArch64CC::VS;
1036     break;
1037   case CmpInst::FCMP_UGT:
1038     CondCode = AArch64CC::HI;
1039     break;
1040   case CmpInst::FCMP_UGE:
1041     CondCode = AArch64CC::PL;
1042     break;
1043   case CmpInst::FCMP_ULT:
1044     CondCode = AArch64CC::LT;
1045     break;
1046   case CmpInst::FCMP_ULE:
1047     CondCode = AArch64CC::LE;
1048     break;
1049   case CmpInst::FCMP_UNE:
1050     CondCode = AArch64CC::NE;
1051     break;
1052   }
1053 }
1054 
1055 /// Return a register which can be used as a bit to test in a TB(N)Z.
getTestBitReg(Register Reg,uint64_t & Bit,bool & Invert,MachineRegisterInfo & MRI)1056 static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1057                               MachineRegisterInfo &MRI) {
1058   assert(Reg.isValid() && "Expected valid register!");
1059   while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1060     unsigned Opc = MI->getOpcode();
1061 
1062     if (!MI->getOperand(0).isReg() ||
1063         !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1064       break;
1065 
1066     // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1067     //
1068     // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1069     // on the truncated x is the same as the bit number on x.
1070     if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1071         Opc == TargetOpcode::G_TRUNC) {
1072       Register NextReg = MI->getOperand(1).getReg();
1073       // Did we find something worth folding?
1074       if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1075         break;
1076 
1077       // NextReg is worth folding. Keep looking.
1078       Reg = NextReg;
1079       continue;
1080     }
1081 
1082     // Attempt to find a suitable operation with a constant on one side.
1083     Optional<uint64_t> C;
1084     Register TestReg;
1085     switch (Opc) {
1086     default:
1087       break;
1088     case TargetOpcode::G_AND:
1089     case TargetOpcode::G_XOR: {
1090       TestReg = MI->getOperand(1).getReg();
1091       Register ConstantReg = MI->getOperand(2).getReg();
1092       auto VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
1093       if (!VRegAndVal) {
1094         // AND commutes, check the other side for a constant.
1095         // FIXME: Can we canonicalize the constant so that it's always on the
1096         // same side at some point earlier?
1097         std::swap(ConstantReg, TestReg);
1098         VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
1099       }
1100       if (VRegAndVal)
1101         C = VRegAndVal->Value;
1102       break;
1103     }
1104     case TargetOpcode::G_ASHR:
1105     case TargetOpcode::G_LSHR:
1106     case TargetOpcode::G_SHL: {
1107       TestReg = MI->getOperand(1).getReg();
1108       auto VRegAndVal =
1109           getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1110       if (VRegAndVal)
1111         C = VRegAndVal->Value;
1112       break;
1113     }
1114     }
1115 
1116     // Didn't find a constant or viable register. Bail out of the loop.
1117     if (!C || !TestReg.isValid())
1118       break;
1119 
1120     // We found a suitable instruction with a constant. Check to see if we can
1121     // walk through the instruction.
1122     Register NextReg;
1123     unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1124     switch (Opc) {
1125     default:
1126       break;
1127     case TargetOpcode::G_AND:
1128       // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1129       if ((*C >> Bit) & 1)
1130         NextReg = TestReg;
1131       break;
1132     case TargetOpcode::G_SHL:
1133       // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1134       // the type of the register.
1135       if (*C <= Bit && (Bit - *C) < TestRegSize) {
1136         NextReg = TestReg;
1137         Bit = Bit - *C;
1138       }
1139       break;
1140     case TargetOpcode::G_ASHR:
1141       // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1142       // in x
1143       NextReg = TestReg;
1144       Bit = Bit + *C;
1145       if (Bit >= TestRegSize)
1146         Bit = TestRegSize - 1;
1147       break;
1148     case TargetOpcode::G_LSHR:
1149       // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1150       if ((Bit + *C) < TestRegSize) {
1151         NextReg = TestReg;
1152         Bit = Bit + *C;
1153       }
1154       break;
1155     case TargetOpcode::G_XOR:
1156       // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1157       // appropriate.
1158       //
1159       // e.g. If x' = xor x, c, and the b-th bit is set in c then
1160       //
1161       // tbz x', b -> tbnz x, b
1162       //
1163       // Because x' only has the b-th bit set if x does not.
1164       if ((*C >> Bit) & 1)
1165         Invert = !Invert;
1166       NextReg = TestReg;
1167       break;
1168     }
1169 
1170     // Check if we found anything worth folding.
1171     if (!NextReg.isValid())
1172       return Reg;
1173     Reg = NextReg;
1174   }
1175 
1176   return Reg;
1177 }
1178 
emitTestBit(Register TestReg,uint64_t Bit,bool IsNegative,MachineBasicBlock * DstMBB,MachineIRBuilder & MIB) const1179 MachineInstr *AArch64InstructionSelector::emitTestBit(
1180     Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1181     MachineIRBuilder &MIB) const {
1182   assert(TestReg.isValid());
1183   assert(ProduceNonFlagSettingCondBr &&
1184          "Cannot emit TB(N)Z with speculation tracking!");
1185   MachineRegisterInfo &MRI = *MIB.getMRI();
1186 
1187   // Attempt to optimize the test bit by walking over instructions.
1188   TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1189   LLT Ty = MRI.getType(TestReg);
1190   unsigned Size = Ty.getSizeInBits();
1191   assert(!Ty.isVector() && "Expected a scalar!");
1192   assert(Bit < 64 && "Bit is too large!");
1193 
1194   // When the test register is a 64-bit register, we have to narrow to make
1195   // TBNZW work.
1196   bool UseWReg = Bit < 32;
1197   unsigned NecessarySize = UseWReg ? 32 : 64;
1198   if (Size < NecessarySize)
1199     TestReg = widenGPRBankRegIfNeeded(TestReg, NecessarySize, MIB);
1200   else if (Size > NecessarySize)
1201     TestReg = narrowExtendRegIfNeeded(TestReg, MIB);
1202 
1203   static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1204                                           {AArch64::TBZW, AArch64::TBNZW}};
1205   unsigned Opc = OpcTable[UseWReg][IsNegative];
1206   auto TestBitMI =
1207       MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1208   constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1209   return &*TestBitMI;
1210 }
1211 
tryOptAndIntoCompareBranch(MachineInstr * AndInst,int64_t CmpConstant,const CmpInst::Predicate & Pred,MachineBasicBlock * DstMBB,MachineIRBuilder & MIB) const1212 bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1213     MachineInstr *AndInst, int64_t CmpConstant, const CmpInst::Predicate &Pred,
1214     MachineBasicBlock *DstMBB, MachineIRBuilder &MIB) const {
1215   // Given something like this:
1216   //
1217   //  %x = ...Something...
1218   //  %one = G_CONSTANT i64 1
1219   //  %zero = G_CONSTANT i64 0
1220   //  %and = G_AND %x, %one
1221   //  %cmp = G_ICMP intpred(ne), %and, %zero
1222   //  %cmp_trunc = G_TRUNC %cmp
1223   //  G_BRCOND %cmp_trunc, %bb.3
1224   //
1225   // We want to try and fold the AND into the G_BRCOND and produce either a
1226   // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1227   //
1228   // In this case, we'd get
1229   //
1230   // TBNZ %x %bb.3
1231   //
1232   if (!AndInst || AndInst->getOpcode() != TargetOpcode::G_AND)
1233     return false;
1234 
1235   // Need to be comparing against 0 to fold.
1236   if (CmpConstant != 0)
1237     return false;
1238 
1239   MachineRegisterInfo &MRI = *MIB.getMRI();
1240 
1241   // Only support EQ and NE. If we have LT, then it *is* possible to fold, but
1242   // we don't want to do this. When we have an AND and LT, we need a TST/ANDS,
1243   // so folding would be redundant.
1244   if (Pred != CmpInst::Predicate::ICMP_EQ &&
1245       Pred != CmpInst::Predicate::ICMP_NE)
1246     return false;
1247 
1248   // Check if the AND has a constant on its RHS which we can use as a mask.
1249   // If it's a power of 2, then it's the same as checking a specific bit.
1250   // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1251   auto MaybeBit =
1252       getConstantVRegValWithLookThrough(AndInst->getOperand(2).getReg(), MRI);
1253   if (!MaybeBit || !isPowerOf2_64(MaybeBit->Value))
1254     return false;
1255 
1256   uint64_t Bit = Log2_64(static_cast<uint64_t>(MaybeBit->Value));
1257   Register TestReg = AndInst->getOperand(1).getReg();
1258   bool Invert = Pred == CmpInst::Predicate::ICMP_NE;
1259 
1260   // Emit a TB(N)Z.
1261   emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1262   return true;
1263 }
1264 
selectCompareBranch(MachineInstr & I,MachineFunction & MF,MachineRegisterInfo & MRI) const1265 bool AArch64InstructionSelector::selectCompareBranch(
1266     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1267 
1268   const Register CondReg = I.getOperand(0).getReg();
1269   MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1270   MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1271   if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
1272     CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg());
1273   if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
1274     return false;
1275 
1276   Register LHS = CCMI->getOperand(2).getReg();
1277   Register RHS = CCMI->getOperand(3).getReg();
1278   auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
1279   MachineIRBuilder MIB(I);
1280   CmpInst::Predicate Pred =
1281       (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
1282   MachineInstr *LHSMI = getDefIgnoringCopies(LHS, MRI);
1283 
1284   // When we can emit a TB(N)Z, prefer that.
1285   //
1286   // Handle non-commutative condition codes first.
1287   // Note that we don't want to do this when we have a G_AND because it can
1288   // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1289   if (VRegAndVal && LHSMI->getOpcode() != TargetOpcode::G_AND) {
1290     int64_t C = VRegAndVal->Value;
1291 
1292     // When we have a greater-than comparison, we can just test if the msb is
1293     // zero.
1294     if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1295       uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1296       emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1297       I.eraseFromParent();
1298       return true;
1299     }
1300 
1301     // When we have a less than comparison, we can just test if the msb is not
1302     // zero.
1303     if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1304       uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1305       emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1306       I.eraseFromParent();
1307       return true;
1308     }
1309   }
1310 
1311   if (!VRegAndVal) {
1312     std::swap(RHS, LHS);
1313     VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
1314     LHSMI = getDefIgnoringCopies(LHS, MRI);
1315   }
1316 
1317   if (!VRegAndVal || VRegAndVal->Value != 0) {
1318     // If we can't select a CBZ then emit a cmp + Bcc.
1319     MachineInstr *Cmp;
1320     std::tie(Cmp, Pred) = emitIntegerCompare(
1321         CCMI->getOperand(2), CCMI->getOperand(3), CCMI->getOperand(1), MIB);
1322     if (!Cmp)
1323       return false;
1324     const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(Pred);
1325     MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1326     I.eraseFromParent();
1327     return true;
1328   }
1329 
1330   // Try to emit a TB(N)Z for an eq or ne condition.
1331   if (tryOptAndIntoCompareBranch(LHSMI, VRegAndVal->Value, Pred, DestMBB,
1332                                  MIB)) {
1333     I.eraseFromParent();
1334     return true;
1335   }
1336 
1337   const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
1338   if (RB.getID() != AArch64::GPRRegBankID)
1339     return false;
1340   if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
1341     return false;
1342 
1343   const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
1344   unsigned CBOpc = 0;
1345   if (CmpWidth <= 32)
1346     CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
1347   else if (CmpWidth == 64)
1348     CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
1349   else
1350     return false;
1351 
1352   BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
1353       .addUse(LHS)
1354       .addMBB(DestMBB)
1355       .constrainAllUses(TII, TRI, RBI);
1356 
1357   I.eraseFromParent();
1358   return true;
1359 }
1360 
1361 /// Returns the element immediate value of a vector shift operand if found.
1362 /// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
getVectorShiftImm(Register Reg,MachineRegisterInfo & MRI)1363 static Optional<int64_t> getVectorShiftImm(Register Reg,
1364                                            MachineRegisterInfo &MRI) {
1365   assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1366   MachineInstr *OpMI = MRI.getVRegDef(Reg);
1367   assert(OpMI && "Expected to find a vreg def for vector shift operand");
1368   if (OpMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR)
1369     return None;
1370 
1371   // Check all operands are identical immediates.
1372   int64_t ImmVal = 0;
1373   for (unsigned Idx = 1; Idx < OpMI->getNumOperands(); ++Idx) {
1374     auto VRegAndVal = getConstantVRegValWithLookThrough(OpMI->getOperand(Idx).getReg(), MRI);
1375     if (!VRegAndVal)
1376       return None;
1377 
1378     if (Idx == 1)
1379       ImmVal = VRegAndVal->Value;
1380     if (ImmVal != VRegAndVal->Value)
1381       return None;
1382   }
1383 
1384   return ImmVal;
1385 }
1386 
1387 /// Matches and returns the shift immediate value for a SHL instruction given
1388 /// a shift operand.
getVectorSHLImm(LLT SrcTy,Register Reg,MachineRegisterInfo & MRI)1389 static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) {
1390   Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1391   if (!ShiftImm)
1392     return None;
1393   // Check the immediate is in range for a SHL.
1394   int64_t Imm = *ShiftImm;
1395   if (Imm < 0)
1396     return None;
1397   switch (SrcTy.getElementType().getSizeInBits()) {
1398   default:
1399     LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1400     return None;
1401   case 8:
1402     if (Imm > 7)
1403       return None;
1404     break;
1405   case 16:
1406     if (Imm > 15)
1407       return None;
1408     break;
1409   case 32:
1410     if (Imm > 31)
1411       return None;
1412     break;
1413   case 64:
1414     if (Imm > 63)
1415       return None;
1416     break;
1417   }
1418   return Imm;
1419 }
1420 
selectVectorSHL(MachineInstr & I,MachineRegisterInfo & MRI) const1421 bool AArch64InstructionSelector::selectVectorSHL(
1422     MachineInstr &I, MachineRegisterInfo &MRI) const {
1423   assert(I.getOpcode() == TargetOpcode::G_SHL);
1424   Register DstReg = I.getOperand(0).getReg();
1425   const LLT Ty = MRI.getType(DstReg);
1426   Register Src1Reg = I.getOperand(1).getReg();
1427   Register Src2Reg = I.getOperand(2).getReg();
1428 
1429   if (!Ty.isVector())
1430     return false;
1431 
1432   // Check if we have a vector of constants on RHS that we can select as the
1433   // immediate form.
1434   Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1435 
1436   unsigned Opc = 0;
1437   if (Ty == LLT::vector(2, 64)) {
1438     Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1439   } else if (Ty == LLT::vector(4, 32)) {
1440     Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1441   } else if (Ty == LLT::vector(2, 32)) {
1442     Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1443   } else {
1444     LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1445     return false;
1446   }
1447 
1448   MachineIRBuilder MIB(I);
1449   auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1450   if (ImmVal)
1451     Shl.addImm(*ImmVal);
1452   else
1453     Shl.addUse(Src2Reg);
1454   constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
1455   I.eraseFromParent();
1456   return true;
1457 }
1458 
selectVectorASHR(MachineInstr & I,MachineRegisterInfo & MRI) const1459 bool AArch64InstructionSelector::selectVectorASHR(
1460     MachineInstr &I, MachineRegisterInfo &MRI) const {
1461   assert(I.getOpcode() == TargetOpcode::G_ASHR);
1462   Register DstReg = I.getOperand(0).getReg();
1463   const LLT Ty = MRI.getType(DstReg);
1464   Register Src1Reg = I.getOperand(1).getReg();
1465   Register Src2Reg = I.getOperand(2).getReg();
1466 
1467   if (!Ty.isVector())
1468     return false;
1469 
1470   // There is not a shift right register instruction, but the shift left
1471   // register instruction takes a signed value, where negative numbers specify a
1472   // right shift.
1473 
1474   unsigned Opc = 0;
1475   unsigned NegOpc = 0;
1476   const TargetRegisterClass *RC = nullptr;
1477   if (Ty == LLT::vector(2, 64)) {
1478     Opc = AArch64::SSHLv2i64;
1479     NegOpc = AArch64::NEGv2i64;
1480     RC = &AArch64::FPR128RegClass;
1481   } else if (Ty == LLT::vector(4, 32)) {
1482     Opc = AArch64::SSHLv4i32;
1483     NegOpc = AArch64::NEGv4i32;
1484     RC = &AArch64::FPR128RegClass;
1485   } else if (Ty == LLT::vector(2, 32)) {
1486     Opc = AArch64::SSHLv2i32;
1487     NegOpc = AArch64::NEGv2i32;
1488     RC = &AArch64::FPR64RegClass;
1489   } else {
1490     LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1491     return false;
1492   }
1493 
1494   MachineIRBuilder MIB(I);
1495   auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1496   constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1497   auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1498   constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1499   I.eraseFromParent();
1500   return true;
1501 }
1502 
selectVaStartAAPCS(MachineInstr & I,MachineFunction & MF,MachineRegisterInfo & MRI) const1503 bool AArch64InstructionSelector::selectVaStartAAPCS(
1504     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1505   return false;
1506 }
1507 
selectVaStartDarwin(MachineInstr & I,MachineFunction & MF,MachineRegisterInfo & MRI) const1508 bool AArch64InstructionSelector::selectVaStartDarwin(
1509     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1510   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1511   Register ListReg = I.getOperand(0).getReg();
1512 
1513   Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1514 
1515   auto MIB =
1516       BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1517           .addDef(ArgsAddrReg)
1518           .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1519           .addImm(0)
1520           .addImm(0);
1521 
1522   constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1523 
1524   MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1525             .addUse(ArgsAddrReg)
1526             .addUse(ListReg)
1527             .addImm(0)
1528             .addMemOperand(*I.memoperands_begin());
1529 
1530   constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1531   I.eraseFromParent();
1532   return true;
1533 }
1534 
materializeLargeCMVal(MachineInstr & I,const Value * V,unsigned OpFlags) const1535 void AArch64InstructionSelector::materializeLargeCMVal(
1536     MachineInstr &I, const Value *V, unsigned OpFlags) const {
1537   MachineBasicBlock &MBB = *I.getParent();
1538   MachineFunction &MF = *MBB.getParent();
1539   MachineRegisterInfo &MRI = MF.getRegInfo();
1540   MachineIRBuilder MIB(I);
1541 
1542   auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1543   MovZ->addOperand(MF, I.getOperand(1));
1544   MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1545                                      AArch64II::MO_NC);
1546   MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1547   constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1548 
1549   auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1550                        Register ForceDstReg) {
1551     Register DstReg = ForceDstReg
1552                           ? ForceDstReg
1553                           : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1554     auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1555     if (auto *GV = dyn_cast<GlobalValue>(V)) {
1556       MovI->addOperand(MF, MachineOperand::CreateGA(
1557                                GV, MovZ->getOperand(1).getOffset(), Flags));
1558     } else {
1559       MovI->addOperand(
1560           MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1561                                        MovZ->getOperand(1).getOffset(), Flags));
1562     }
1563     MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1564     constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
1565     return DstReg;
1566   };
1567   Register DstReg = BuildMovK(MovZ.getReg(0),
1568                               AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
1569   DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1570   BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1571   return;
1572 }
1573 
preISelLower(MachineInstr & I)1574 bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
1575   MachineBasicBlock &MBB = *I.getParent();
1576   MachineFunction &MF = *MBB.getParent();
1577   MachineRegisterInfo &MRI = MF.getRegInfo();
1578 
1579   switch (I.getOpcode()) {
1580   case TargetOpcode::G_SHL:
1581   case TargetOpcode::G_ASHR:
1582   case TargetOpcode::G_LSHR: {
1583     // These shifts are legalized to have 64 bit shift amounts because we want
1584     // to take advantage of the existing imported selection patterns that assume
1585     // the immediates are s64s. However, if the shifted type is 32 bits and for
1586     // some reason we receive input GMIR that has an s64 shift amount that's not
1587     // a G_CONSTANT, insert a truncate so that we can still select the s32
1588     // register-register variant.
1589     Register SrcReg = I.getOperand(1).getReg();
1590     Register ShiftReg = I.getOperand(2).getReg();
1591     const LLT ShiftTy = MRI.getType(ShiftReg);
1592     const LLT SrcTy = MRI.getType(SrcReg);
1593     if (SrcTy.isVector())
1594       return false;
1595     assert(!ShiftTy.isVector() && "unexpected vector shift ty");
1596     if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
1597       return false;
1598     auto *AmtMI = MRI.getVRegDef(ShiftReg);
1599     assert(AmtMI && "could not find a vreg definition for shift amount");
1600     if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
1601       // Insert a subregister copy to implement a 64->32 trunc
1602       MachineIRBuilder MIB(I);
1603       auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
1604                        .addReg(ShiftReg, 0, AArch64::sub_32);
1605       MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1606       I.getOperand(2).setReg(Trunc.getReg(0));
1607     }
1608     return true;
1609   }
1610   case TargetOpcode::G_STORE:
1611     return contractCrossBankCopyIntoStore(I, MRI);
1612   case TargetOpcode::G_PTR_ADD:
1613     return convertPtrAddToAdd(I, MRI);
1614   case TargetOpcode::G_LOAD: {
1615     // For scalar loads of pointers, we try to convert the dest type from p0
1616     // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
1617     // conversion, this should be ok because all users should have been
1618     // selected already, so the type doesn't matter for them.
1619     Register DstReg = I.getOperand(0).getReg();
1620     const LLT DstTy = MRI.getType(DstReg);
1621     if (!DstTy.isPointer())
1622       return false;
1623     MRI.setType(DstReg, LLT::scalar(64));
1624     return true;
1625   }
1626   default:
1627     return false;
1628   }
1629 }
1630 
1631 /// This lowering tries to look for G_PTR_ADD instructions and then converts
1632 /// them to a standard G_ADD with a COPY on the source.
1633 ///
1634 /// The motivation behind this is to expose the add semantics to the imported
1635 /// tablegen patterns. We shouldn't need to check for uses being loads/stores,
1636 /// because the selector works bottom up, uses before defs. By the time we
1637 /// end up trying to select a G_PTR_ADD, we should have already attempted to
1638 /// fold this into addressing modes and were therefore unsuccessful.
convertPtrAddToAdd(MachineInstr & I,MachineRegisterInfo & MRI)1639 bool AArch64InstructionSelector::convertPtrAddToAdd(
1640     MachineInstr &I, MachineRegisterInfo &MRI) {
1641   assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1642   Register DstReg = I.getOperand(0).getReg();
1643   Register AddOp1Reg = I.getOperand(1).getReg();
1644   const LLT PtrTy = MRI.getType(DstReg);
1645   if (PtrTy.getAddressSpace() != 0)
1646     return false;
1647 
1648   MachineIRBuilder MIB(I);
1649   const LLT CastPtrTy = PtrTy.isVector() ? LLT::vector(2, 64) : LLT::scalar(64);
1650   auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
1651   // Set regbanks on the registers.
1652   if (PtrTy.isVector())
1653     MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
1654   else
1655     MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1656 
1657   // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
1658   // %dst(intty) = G_ADD %intbase, off
1659   I.setDesc(TII.get(TargetOpcode::G_ADD));
1660   MRI.setType(DstReg, CastPtrTy);
1661   I.getOperand(1).setReg(PtrToInt.getReg(0));
1662   if (!select(*PtrToInt)) {
1663     LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
1664     return false;
1665   }
1666   return true;
1667 }
1668 
earlySelectSHL(MachineInstr & I,MachineRegisterInfo & MRI) const1669 bool AArch64InstructionSelector::earlySelectSHL(
1670     MachineInstr &I, MachineRegisterInfo &MRI) const {
1671   // We try to match the immediate variant of LSL, which is actually an alias
1672   // for a special case of UBFM. Otherwise, we fall back to the imported
1673   // selector which will match the register variant.
1674   assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
1675   const auto &MO = I.getOperand(2);
1676   auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI);
1677   if (!VRegAndVal)
1678     return false;
1679 
1680   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1681   if (DstTy.isVector())
1682     return false;
1683   bool Is64Bit = DstTy.getSizeInBits() == 64;
1684   auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
1685   auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
1686   MachineIRBuilder MIB(I);
1687 
1688   if (!Imm1Fn || !Imm2Fn)
1689     return false;
1690 
1691   auto NewI =
1692       MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
1693                      {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
1694 
1695   for (auto &RenderFn : *Imm1Fn)
1696     RenderFn(NewI);
1697   for (auto &RenderFn : *Imm2Fn)
1698     RenderFn(NewI);
1699 
1700   I.eraseFromParent();
1701   return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
1702 }
1703 
contractCrossBankCopyIntoStore(MachineInstr & I,MachineRegisterInfo & MRI)1704 bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
1705     MachineInstr &I, MachineRegisterInfo &MRI) {
1706   assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
1707   // If we're storing a scalar, it doesn't matter what register bank that
1708   // scalar is on. All that matters is the size.
1709   //
1710   // So, if we see something like this (with a 32-bit scalar as an example):
1711   //
1712   // %x:gpr(s32) = ... something ...
1713   // %y:fpr(s32) = COPY %x:gpr(s32)
1714   // G_STORE %y:fpr(s32)
1715   //
1716   // We can fix this up into something like this:
1717   //
1718   // G_STORE %x:gpr(s32)
1719   //
1720   // And then continue the selection process normally.
1721   Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
1722   if (!DefDstReg.isValid())
1723     return false;
1724   LLT DefDstTy = MRI.getType(DefDstReg);
1725   Register StoreSrcReg = I.getOperand(0).getReg();
1726   LLT StoreSrcTy = MRI.getType(StoreSrcReg);
1727 
1728   // If we get something strange like a physical register, then we shouldn't
1729   // go any further.
1730   if (!DefDstTy.isValid())
1731     return false;
1732 
1733   // Are the source and dst types the same size?
1734   if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
1735     return false;
1736 
1737   if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
1738       RBI.getRegBank(DefDstReg, MRI, TRI))
1739     return false;
1740 
1741   // We have a cross-bank copy, which is entering a store. Let's fold it.
1742   I.getOperand(0).setReg(DefDstReg);
1743   return true;
1744 }
1745 
earlySelect(MachineInstr & I) const1746 bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
1747   assert(I.getParent() && "Instruction should be in a basic block!");
1748   assert(I.getParent()->getParent() && "Instruction should be in a function!");
1749 
1750   MachineBasicBlock &MBB = *I.getParent();
1751   MachineFunction &MF = *MBB.getParent();
1752   MachineRegisterInfo &MRI = MF.getRegInfo();
1753 
1754   switch (I.getOpcode()) {
1755   case TargetOpcode::G_SHL:
1756     return earlySelectSHL(I, MRI);
1757   case TargetOpcode::G_CONSTANT: {
1758     bool IsZero = false;
1759     if (I.getOperand(1).isCImm())
1760       IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
1761     else if (I.getOperand(1).isImm())
1762       IsZero = I.getOperand(1).getImm() == 0;
1763 
1764     if (!IsZero)
1765       return false;
1766 
1767     Register DefReg = I.getOperand(0).getReg();
1768     LLT Ty = MRI.getType(DefReg);
1769     if (Ty.getSizeInBits() == 64) {
1770       I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
1771       RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
1772     } else if (Ty.getSizeInBits() == 32) {
1773       I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
1774       RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
1775     } else
1776       return false;
1777 
1778     I.setDesc(TII.get(TargetOpcode::COPY));
1779     return true;
1780   }
1781   default:
1782     return false;
1783   }
1784 }
1785 
select(MachineInstr & I)1786 bool AArch64InstructionSelector::select(MachineInstr &I) {
1787   assert(I.getParent() && "Instruction should be in a basic block!");
1788   assert(I.getParent()->getParent() && "Instruction should be in a function!");
1789 
1790   MachineBasicBlock &MBB = *I.getParent();
1791   MachineFunction &MF = *MBB.getParent();
1792   MachineRegisterInfo &MRI = MF.getRegInfo();
1793 
1794   const AArch64Subtarget *Subtarget =
1795       &static_cast<const AArch64Subtarget &>(MF.getSubtarget());
1796   if (Subtarget->requiresStrictAlign()) {
1797     // We don't support this feature yet.
1798     LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
1799     return false;
1800   }
1801 
1802   unsigned Opcode = I.getOpcode();
1803   // G_PHI requires same handling as PHI
1804   if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
1805     // Certain non-generic instructions also need some special handling.
1806 
1807     if (Opcode ==  TargetOpcode::LOAD_STACK_GUARD)
1808       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1809 
1810     if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
1811       const Register DefReg = I.getOperand(0).getReg();
1812       const LLT DefTy = MRI.getType(DefReg);
1813 
1814       const RegClassOrRegBank &RegClassOrBank =
1815         MRI.getRegClassOrRegBank(DefReg);
1816 
1817       const TargetRegisterClass *DefRC
1818         = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
1819       if (!DefRC) {
1820         if (!DefTy.isValid()) {
1821           LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
1822           return false;
1823         }
1824         const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
1825         DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
1826         if (!DefRC) {
1827           LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
1828           return false;
1829         }
1830       }
1831 
1832       I.setDesc(TII.get(TargetOpcode::PHI));
1833 
1834       return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
1835     }
1836 
1837     if (I.isCopy())
1838       return selectCopy(I, TII, MRI, TRI, RBI);
1839 
1840     return true;
1841   }
1842 
1843 
1844   if (I.getNumOperands() != I.getNumExplicitOperands()) {
1845     LLVM_DEBUG(
1846         dbgs() << "Generic instruction has unexpected implicit operands\n");
1847     return false;
1848   }
1849 
1850   // Try to do some lowering before we start instruction selecting. These
1851   // lowerings are purely transformations on the input G_MIR and so selection
1852   // must continue after any modification of the instruction.
1853   if (preISelLower(I)) {
1854     Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
1855   }
1856 
1857   // There may be patterns where the importer can't deal with them optimally,
1858   // but does select it to a suboptimal sequence so our custom C++ selection
1859   // code later never has a chance to work on it. Therefore, we have an early
1860   // selection attempt here to give priority to certain selection routines
1861   // over the imported ones.
1862   if (earlySelect(I))
1863     return true;
1864 
1865   if (selectImpl(I, *CoverageInfo))
1866     return true;
1867 
1868   LLT Ty =
1869       I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
1870 
1871   MachineIRBuilder MIB(I);
1872 
1873   switch (Opcode) {
1874   case TargetOpcode::G_BRCOND: {
1875     if (Ty.getSizeInBits() > 32) {
1876       // We shouldn't need this on AArch64, but it would be implemented as an
1877       // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the
1878       // bit being tested is < 32.
1879       LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Ty
1880                         << ", expected at most 32-bits");
1881       return false;
1882     }
1883 
1884     const Register CondReg = I.getOperand(0).getReg();
1885     MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1886 
1887     // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1888     // instructions will not be produced, as they are conditional branch
1889     // instructions that do not set flags.
1890     if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
1891       return true;
1892 
1893     if (ProduceNonFlagSettingCondBr) {
1894       auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
1895                      .addUse(CondReg)
1896                      .addImm(/*bit offset=*/0)
1897                      .addMBB(DestMBB);
1898 
1899       I.eraseFromParent();
1900       return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
1901     } else {
1902       auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
1903                      .addDef(AArch64::WZR)
1904                      .addUse(CondReg)
1905                      .addImm(1);
1906       constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
1907       auto Bcc =
1908           BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
1909               .addImm(AArch64CC::EQ)
1910               .addMBB(DestMBB);
1911 
1912       I.eraseFromParent();
1913       return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
1914     }
1915   }
1916 
1917   case TargetOpcode::G_BRINDIRECT: {
1918     I.setDesc(TII.get(AArch64::BR));
1919     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1920   }
1921 
1922   case TargetOpcode::G_BRJT:
1923     return selectBrJT(I, MRI);
1924 
1925   case AArch64::G_ADD_LOW: {
1926     // This op may have been separated from it's ADRP companion by the localizer
1927     // or some other code motion pass. Given that many CPUs will try to
1928     // macro fuse these operations anyway, select this into a MOVaddr pseudo
1929     // which will later be expanded into an ADRP+ADD pair after scheduling.
1930     MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
1931     if (BaseMI->getOpcode() != AArch64::ADRP) {
1932       I.setDesc(TII.get(AArch64::ADDXri));
1933       I.addOperand(MachineOperand::CreateImm(0));
1934       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1935     }
1936     assert(TM.getCodeModel() == CodeModel::Small &&
1937            "Expected small code model");
1938     MachineIRBuilder MIB(I);
1939     auto Op1 = BaseMI->getOperand(1);
1940     auto Op2 = I.getOperand(2);
1941     auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
1942                        .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
1943                                          Op1.getTargetFlags())
1944                        .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
1945                                          Op2.getTargetFlags());
1946     I.eraseFromParent();
1947     return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
1948   }
1949 
1950   case TargetOpcode::G_BSWAP: {
1951     // Handle vector types for G_BSWAP directly.
1952     Register DstReg = I.getOperand(0).getReg();
1953     LLT DstTy = MRI.getType(DstReg);
1954 
1955     // We should only get vector types here; everything else is handled by the
1956     // importer right now.
1957     if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
1958       LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
1959       return false;
1960     }
1961 
1962     // Only handle 4 and 2 element vectors for now.
1963     // TODO: 16-bit elements.
1964     unsigned NumElts = DstTy.getNumElements();
1965     if (NumElts != 4 && NumElts != 2) {
1966       LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
1967       return false;
1968     }
1969 
1970     // Choose the correct opcode for the supported types. Right now, that's
1971     // v2s32, v4s32, and v2s64.
1972     unsigned Opc = 0;
1973     unsigned EltSize = DstTy.getElementType().getSizeInBits();
1974     if (EltSize == 32)
1975       Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
1976                                           : AArch64::REV32v16i8;
1977     else if (EltSize == 64)
1978       Opc = AArch64::REV64v16i8;
1979 
1980     // We should always get something by the time we get here...
1981     assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
1982 
1983     I.setDesc(TII.get(Opc));
1984     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1985   }
1986 
1987   case TargetOpcode::G_FCONSTANT:
1988   case TargetOpcode::G_CONSTANT: {
1989     const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
1990 
1991     const LLT s8 = LLT::scalar(8);
1992     const LLT s16 = LLT::scalar(16);
1993     const LLT s32 = LLT::scalar(32);
1994     const LLT s64 = LLT::scalar(64);
1995     const LLT p0 = LLT::pointer(0, 64);
1996 
1997     const Register DefReg = I.getOperand(0).getReg();
1998     const LLT DefTy = MRI.getType(DefReg);
1999     const unsigned DefSize = DefTy.getSizeInBits();
2000     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2001 
2002     // FIXME: Redundant check, but even less readable when factored out.
2003     if (isFP) {
2004       if (Ty != s32 && Ty != s64) {
2005         LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2006                           << " constant, expected: " << s32 << " or " << s64
2007                           << '\n');
2008         return false;
2009       }
2010 
2011       if (RB.getID() != AArch64::FPRRegBankID) {
2012         LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2013                           << " constant on bank: " << RB
2014                           << ", expected: FPR\n");
2015         return false;
2016       }
2017 
2018       // The case when we have 0.0 is covered by tablegen. Reject it here so we
2019       // can be sure tablegen works correctly and isn't rescued by this code.
2020       if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
2021         return false;
2022     } else {
2023       // s32 and s64 are covered by tablegen.
2024       if (Ty != p0 && Ty != s8 && Ty != s16) {
2025         LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2026                           << " constant, expected: " << s32 << ", " << s64
2027                           << ", or " << p0 << '\n');
2028         return false;
2029       }
2030 
2031       if (RB.getID() != AArch64::GPRRegBankID) {
2032         LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2033                           << " constant on bank: " << RB
2034                           << ", expected: GPR\n");
2035         return false;
2036       }
2037     }
2038 
2039     // We allow G_CONSTANT of types < 32b.
2040     const unsigned MovOpc =
2041         DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2042 
2043     if (isFP) {
2044       // Either emit a FMOV, or emit a copy to emit a normal mov.
2045       const TargetRegisterClass &GPRRC =
2046           DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
2047       const TargetRegisterClass &FPRRC =
2048           DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
2049 
2050       // Can we use a FMOV instruction to represent the immediate?
2051       if (emitFMovForFConstant(I, MRI))
2052         return true;
2053 
2054       // For 64b values, emit a constant pool load instead.
2055       if (DefSize == 64) {
2056         auto *FPImm = I.getOperand(1).getFPImm();
2057         MachineIRBuilder MIB(I);
2058         auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2059         if (!LoadMI) {
2060           LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2061           return false;
2062         }
2063         MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2064         I.eraseFromParent();
2065         return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2066       }
2067 
2068       // Nope. Emit a copy and use a normal mov instead.
2069       const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
2070       MachineOperand &RegOp = I.getOperand(0);
2071       RegOp.setReg(DefGPRReg);
2072       MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2073       MIB.buildCopy({DefReg}, {DefGPRReg});
2074 
2075       if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2076         LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2077         return false;
2078       }
2079 
2080       MachineOperand &ImmOp = I.getOperand(1);
2081       // FIXME: Is going through int64_t always correct?
2082       ImmOp.ChangeToImmediate(
2083           ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
2084     } else if (I.getOperand(1).isCImm()) {
2085       uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2086       I.getOperand(1).ChangeToImmediate(Val);
2087     } else if (I.getOperand(1).isImm()) {
2088       uint64_t Val = I.getOperand(1).getImm();
2089       I.getOperand(1).ChangeToImmediate(Val);
2090     }
2091 
2092     I.setDesc(TII.get(MovOpc));
2093     constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2094     return true;
2095   }
2096   case TargetOpcode::G_EXTRACT: {
2097     Register DstReg = I.getOperand(0).getReg();
2098     Register SrcReg = I.getOperand(1).getReg();
2099     LLT SrcTy = MRI.getType(SrcReg);
2100     LLT DstTy = MRI.getType(DstReg);
2101     (void)DstTy;
2102     unsigned SrcSize = SrcTy.getSizeInBits();
2103 
2104     if (SrcTy.getSizeInBits() > 64) {
2105       // This should be an extract of an s128, which is like a vector extract.
2106       if (SrcTy.getSizeInBits() != 128)
2107         return false;
2108       // Only support extracting 64 bits from an s128 at the moment.
2109       if (DstTy.getSizeInBits() != 64)
2110         return false;
2111 
2112       const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2113       const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2114       // Check we have the right regbank always.
2115       assert(SrcRB.getID() == AArch64::FPRRegBankID &&
2116              DstRB.getID() == AArch64::FPRRegBankID &&
2117              "Wrong extract regbank!");
2118       (void)SrcRB;
2119 
2120       // Emit the same code as a vector extract.
2121       // Offset must be a multiple of 64.
2122       unsigned Offset = I.getOperand(2).getImm();
2123       if (Offset % 64 != 0)
2124         return false;
2125       unsigned LaneIdx = Offset / 64;
2126       MachineIRBuilder MIB(I);
2127       MachineInstr *Extract = emitExtractVectorElt(
2128           DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2129       if (!Extract)
2130         return false;
2131       I.eraseFromParent();
2132       return true;
2133     }
2134 
2135     I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2136     MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2137                                       Ty.getSizeInBits() - 1);
2138 
2139     if (SrcSize < 64) {
2140       assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
2141              "unexpected G_EXTRACT types");
2142       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2143     }
2144 
2145     DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2146     MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2147     MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2148         .addReg(DstReg, 0, AArch64::sub_32);
2149     RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2150                                  AArch64::GPR32RegClass, MRI);
2151     I.getOperand(0).setReg(DstReg);
2152 
2153     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2154   }
2155 
2156   case TargetOpcode::G_INSERT: {
2157     LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2158     LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2159     unsigned DstSize = DstTy.getSizeInBits();
2160     // Larger inserts are vectors, same-size ones should be something else by
2161     // now (split up or turned into COPYs).
2162     if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2163       return false;
2164 
2165     I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2166     unsigned LSB = I.getOperand(3).getImm();
2167     unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2168     I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2169     MachineInstrBuilder(MF, I).addImm(Width - 1);
2170 
2171     if (DstSize < 64) {
2172       assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
2173              "unexpected G_INSERT types");
2174       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2175     }
2176 
2177     Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2178     BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2179             TII.get(AArch64::SUBREG_TO_REG))
2180         .addDef(SrcReg)
2181         .addImm(0)
2182         .addUse(I.getOperand(2).getReg())
2183         .addImm(AArch64::sub_32);
2184     RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2185                                  AArch64::GPR32RegClass, MRI);
2186     I.getOperand(2).setReg(SrcReg);
2187 
2188     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2189   }
2190   case TargetOpcode::G_FRAME_INDEX: {
2191     // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2192     if (Ty != LLT::pointer(0, 64)) {
2193       LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2194                         << ", expected: " << LLT::pointer(0, 64) << '\n');
2195       return false;
2196     }
2197     I.setDesc(TII.get(AArch64::ADDXri));
2198 
2199     // MOs for a #0 shifted immediate.
2200     I.addOperand(MachineOperand::CreateImm(0));
2201     I.addOperand(MachineOperand::CreateImm(0));
2202 
2203     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2204   }
2205 
2206   case TargetOpcode::G_GLOBAL_VALUE: {
2207     auto GV = I.getOperand(1).getGlobal();
2208     if (GV->isThreadLocal())
2209       return selectTLSGlobalValue(I, MRI);
2210 
2211     unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
2212     if (OpFlags & AArch64II::MO_GOT) {
2213       I.setDesc(TII.get(AArch64::LOADgot));
2214       I.getOperand(1).setTargetFlags(OpFlags);
2215     } else if (TM.getCodeModel() == CodeModel::Large) {
2216       // Materialize the global using movz/movk instructions.
2217       materializeLargeCMVal(I, GV, OpFlags);
2218       I.eraseFromParent();
2219       return true;
2220     } else if (TM.getCodeModel() == CodeModel::Tiny) {
2221       I.setDesc(TII.get(AArch64::ADR));
2222       I.getOperand(1).setTargetFlags(OpFlags);
2223     } else {
2224       I.setDesc(TII.get(AArch64::MOVaddr));
2225       I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2226       MachineInstrBuilder MIB(MF, I);
2227       MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2228                            OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2229     }
2230     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2231   }
2232 
2233   case TargetOpcode::G_ZEXTLOAD:
2234   case TargetOpcode::G_LOAD:
2235   case TargetOpcode::G_STORE: {
2236     bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2237     MachineIRBuilder MIB(I);
2238 
2239     LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
2240 
2241     if (PtrTy != LLT::pointer(0, 64)) {
2242       LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
2243                         << ", expected: " << LLT::pointer(0, 64) << '\n');
2244       return false;
2245     }
2246 
2247     auto &MemOp = **I.memoperands_begin();
2248     if (MemOp.isAtomic()) {
2249       // For now we just support s8 acquire loads to be able to compile stack
2250       // protector code.
2251       if (MemOp.getOrdering() == AtomicOrdering::Acquire &&
2252           MemOp.getSize() == 1) {
2253         I.setDesc(TII.get(AArch64::LDARB));
2254         return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2255       }
2256       LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n");
2257       return false;
2258     }
2259     unsigned MemSizeInBits = MemOp.getSize() * 8;
2260 
2261     const Register PtrReg = I.getOperand(1).getReg();
2262 #ifndef NDEBUG
2263     const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2264     // Sanity-check the pointer register.
2265     assert(PtrRB.getID() == AArch64::GPRRegBankID &&
2266            "Load/Store pointer operand isn't a GPR");
2267     assert(MRI.getType(PtrReg).isPointer() &&
2268            "Load/Store pointer operand isn't a pointer");
2269 #endif
2270 
2271     const Register ValReg = I.getOperand(0).getReg();
2272     const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2273 
2274     const unsigned NewOpc =
2275         selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2276     if (NewOpc == I.getOpcode())
2277       return false;
2278 
2279     I.setDesc(TII.get(NewOpc));
2280 
2281     uint64_t Offset = 0;
2282     auto *PtrMI = MRI.getVRegDef(PtrReg);
2283 
2284     // Try to fold a GEP into our unsigned immediate addressing mode.
2285     if (PtrMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
2286       if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
2287         int64_t Imm = *COff;
2288         const unsigned Size = MemSizeInBits / 8;
2289         const unsigned Scale = Log2_32(Size);
2290         if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
2291           Register Ptr2Reg = PtrMI->getOperand(1).getReg();
2292           I.getOperand(1).setReg(Ptr2Reg);
2293           PtrMI = MRI.getVRegDef(Ptr2Reg);
2294           Offset = Imm / Size;
2295         }
2296       }
2297     }
2298 
2299     // If we haven't folded anything into our addressing mode yet, try to fold
2300     // a frame index into the base+offset.
2301     if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX)
2302       I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
2303 
2304     I.addOperand(MachineOperand::CreateImm(Offset));
2305 
2306     // If we're storing a 0, use WZR/XZR.
2307     if (auto CVal = getConstantVRegVal(ValReg, MRI)) {
2308       if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) {
2309         if (I.getOpcode() == AArch64::STRWui)
2310           I.getOperand(0).setReg(AArch64::WZR);
2311         else if (I.getOpcode() == AArch64::STRXui)
2312           I.getOperand(0).setReg(AArch64::XZR);
2313       }
2314     }
2315 
2316     if (IsZExtLoad) {
2317       // The zextload from a smaller type to i32 should be handled by the importer.
2318       if (MRI.getType(ValReg).getSizeInBits() != 64)
2319         return false;
2320       // If we have a ZEXTLOAD then change the load's type to be a narrower reg
2321       //and zero_extend with SUBREG_TO_REG.
2322       Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2323       Register DstReg = I.getOperand(0).getReg();
2324       I.getOperand(0).setReg(LdReg);
2325 
2326       MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2327       MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
2328           .addImm(0)
2329           .addUse(LdReg)
2330           .addImm(AArch64::sub_32);
2331       constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2332       return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
2333                                           MRI);
2334     }
2335     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2336   }
2337 
2338   case TargetOpcode::G_SMULH:
2339   case TargetOpcode::G_UMULH: {
2340     // Reject the various things we don't support yet.
2341     if (unsupportedBinOp(I, RBI, MRI, TRI))
2342       return false;
2343 
2344     const Register DefReg = I.getOperand(0).getReg();
2345     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2346 
2347     if (RB.getID() != AArch64::GPRRegBankID) {
2348       LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
2349       return false;
2350     }
2351 
2352     if (Ty != LLT::scalar(64)) {
2353       LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
2354                         << ", expected: " << LLT::scalar(64) << '\n');
2355       return false;
2356     }
2357 
2358     unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
2359                                                              : AArch64::UMULHrr;
2360     I.setDesc(TII.get(NewOpc));
2361 
2362     // Now that we selected an opcode, we need to constrain the register
2363     // operands to use appropriate classes.
2364     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2365   }
2366   case TargetOpcode::G_FADD:
2367   case TargetOpcode::G_FSUB:
2368   case TargetOpcode::G_FMUL:
2369   case TargetOpcode::G_FDIV:
2370 
2371   case TargetOpcode::G_ASHR:
2372     if (MRI.getType(I.getOperand(0).getReg()).isVector())
2373       return selectVectorASHR(I, MRI);
2374     LLVM_FALLTHROUGH;
2375   case TargetOpcode::G_SHL:
2376     if (Opcode == TargetOpcode::G_SHL &&
2377         MRI.getType(I.getOperand(0).getReg()).isVector())
2378       return selectVectorSHL(I, MRI);
2379     LLVM_FALLTHROUGH;
2380   case TargetOpcode::G_OR:
2381   case TargetOpcode::G_LSHR: {
2382     // Reject the various things we don't support yet.
2383     if (unsupportedBinOp(I, RBI, MRI, TRI))
2384       return false;
2385 
2386     const unsigned OpSize = Ty.getSizeInBits();
2387 
2388     const Register DefReg = I.getOperand(0).getReg();
2389     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2390 
2391     const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
2392     if (NewOpc == I.getOpcode())
2393       return false;
2394 
2395     I.setDesc(TII.get(NewOpc));
2396     // FIXME: Should the type be always reset in setDesc?
2397 
2398     // Now that we selected an opcode, we need to constrain the register
2399     // operands to use appropriate classes.
2400     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2401   }
2402 
2403   case TargetOpcode::G_PTR_ADD: {
2404     MachineIRBuilder MIRBuilder(I);
2405     emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2),
2406             MIRBuilder);
2407     I.eraseFromParent();
2408     return true;
2409   }
2410   case TargetOpcode::G_UADDO: {
2411     // TODO: Support other types.
2412     unsigned OpSize = Ty.getSizeInBits();
2413     if (OpSize != 32 && OpSize != 64) {
2414       LLVM_DEBUG(
2415           dbgs()
2416           << "G_UADDO currently only supported for 32 and 64 b types.\n");
2417       return false;
2418     }
2419 
2420     // TODO: Support vectors.
2421     if (Ty.isVector()) {
2422       LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n");
2423       return false;
2424     }
2425 
2426     // Add and set the set condition flag.
2427     unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr;
2428     MachineIRBuilder MIRBuilder(I);
2429     auto AddsMI = MIRBuilder.buildInstr(AddsOpc, {I.getOperand(0)},
2430                                         {I.getOperand(2), I.getOperand(3)});
2431     constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI);
2432 
2433     // Now, put the overflow result in the register given by the first operand
2434     // to the G_UADDO. CSINC increments the result when the predicate is false,
2435     // so to get the increment when it's true, we need to use the inverse. In
2436     // this case, we want to increment when carry is set.
2437     auto CsetMI = MIRBuilder
2438                       .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
2439                                   {Register(AArch64::WZR), Register(AArch64::WZR)})
2440                       .addImm(getInvertedCondCode(AArch64CC::HS));
2441     constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
2442     I.eraseFromParent();
2443     return true;
2444   }
2445 
2446   case TargetOpcode::G_PTRMASK: {
2447     Register MaskReg = I.getOperand(2).getReg();
2448     Optional<int64_t> MaskVal = getConstantVRegVal(MaskReg, MRI);
2449     // TODO: Implement arbitrary cases
2450     if (!MaskVal || !isShiftedMask_64(*MaskVal))
2451       return false;
2452 
2453     uint64_t Mask = *MaskVal;
2454     I.setDesc(TII.get(AArch64::ANDXri));
2455     I.getOperand(2).ChangeToImmediate(
2456         AArch64_AM::encodeLogicalImmediate(Mask, 64));
2457 
2458     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2459   }
2460   case TargetOpcode::G_PTRTOINT:
2461   case TargetOpcode::G_TRUNC: {
2462     const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2463     const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2464 
2465     const Register DstReg = I.getOperand(0).getReg();
2466     const Register SrcReg = I.getOperand(1).getReg();
2467 
2468     const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2469     const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2470 
2471     if (DstRB.getID() != SrcRB.getID()) {
2472       LLVM_DEBUG(
2473           dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
2474       return false;
2475     }
2476 
2477     if (DstRB.getID() == AArch64::GPRRegBankID) {
2478       const TargetRegisterClass *DstRC =
2479           getRegClassForTypeOnBank(DstTy, DstRB, RBI);
2480       if (!DstRC)
2481         return false;
2482 
2483       const TargetRegisterClass *SrcRC =
2484           getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
2485       if (!SrcRC)
2486         return false;
2487 
2488       if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
2489           !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
2490         LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
2491         return false;
2492       }
2493 
2494       if (DstRC == SrcRC) {
2495         // Nothing to be done
2496       } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
2497                  SrcTy == LLT::scalar(64)) {
2498         llvm_unreachable("TableGen can import this case");
2499         return false;
2500       } else if (DstRC == &AArch64::GPR32RegClass &&
2501                  SrcRC == &AArch64::GPR64RegClass) {
2502         I.getOperand(1).setSubReg(AArch64::sub_32);
2503       } else {
2504         LLVM_DEBUG(
2505             dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
2506         return false;
2507       }
2508 
2509       I.setDesc(TII.get(TargetOpcode::COPY));
2510       return true;
2511     } else if (DstRB.getID() == AArch64::FPRRegBankID) {
2512       if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
2513         I.setDesc(TII.get(AArch64::XTNv4i16));
2514         constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2515         return true;
2516       }
2517 
2518       if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
2519         MachineIRBuilder MIB(I);
2520         MachineInstr *Extract = emitExtractVectorElt(
2521             DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
2522         if (!Extract)
2523           return false;
2524         I.eraseFromParent();
2525         return true;
2526       }
2527 
2528       // We might have a vector G_PTRTOINT, in which case just emit a COPY.
2529       if (Opcode == TargetOpcode::G_PTRTOINT) {
2530         assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
2531         I.setDesc(TII.get(TargetOpcode::COPY));
2532         return true;
2533       }
2534     }
2535 
2536     return false;
2537   }
2538 
2539   case TargetOpcode::G_ANYEXT: {
2540     const Register DstReg = I.getOperand(0).getReg();
2541     const Register SrcReg = I.getOperand(1).getReg();
2542 
2543     const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
2544     if (RBDst.getID() != AArch64::GPRRegBankID) {
2545       LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
2546                         << ", expected: GPR\n");
2547       return false;
2548     }
2549 
2550     const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
2551     if (RBSrc.getID() != AArch64::GPRRegBankID) {
2552       LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
2553                         << ", expected: GPR\n");
2554       return false;
2555     }
2556 
2557     const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
2558 
2559     if (DstSize == 0) {
2560       LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
2561       return false;
2562     }
2563 
2564     if (DstSize != 64 && DstSize > 32) {
2565       LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
2566                         << ", expected: 32 or 64\n");
2567       return false;
2568     }
2569     // At this point G_ANYEXT is just like a plain COPY, but we need
2570     // to explicitly form the 64-bit value if any.
2571     if (DstSize > 32) {
2572       Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
2573       BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
2574           .addDef(ExtSrc)
2575           .addImm(0)
2576           .addUse(SrcReg)
2577           .addImm(AArch64::sub_32);
2578       I.getOperand(1).setReg(ExtSrc);
2579     }
2580     return selectCopy(I, TII, MRI, TRI, RBI);
2581   }
2582 
2583   case TargetOpcode::G_ZEXT:
2584   case TargetOpcode::G_SEXT_INREG:
2585   case TargetOpcode::G_SEXT: {
2586     unsigned Opcode = I.getOpcode();
2587     const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
2588     const Register DefReg = I.getOperand(0).getReg();
2589     Register SrcReg = I.getOperand(1).getReg();
2590     const LLT DstTy = MRI.getType(DefReg);
2591     const LLT SrcTy = MRI.getType(SrcReg);
2592     unsigned DstSize = DstTy.getSizeInBits();
2593     unsigned SrcSize = SrcTy.getSizeInBits();
2594 
2595     // SEXT_INREG has the same src reg size as dst, the size of the value to be
2596     // extended is encoded in the imm.
2597     if (Opcode == TargetOpcode::G_SEXT_INREG)
2598       SrcSize = I.getOperand(2).getImm();
2599 
2600     if (DstTy.isVector())
2601       return false; // Should be handled by imported patterns.
2602 
2603     assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
2604                AArch64::GPRRegBankID &&
2605            "Unexpected ext regbank");
2606 
2607     MachineIRBuilder MIB(I);
2608     MachineInstr *ExtI;
2609 
2610     // First check if we're extending the result of a load which has a dest type
2611     // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
2612     // GPR register on AArch64 and all loads which are smaller automatically
2613     // zero-extend the upper bits. E.g.
2614     // %v(s8) = G_LOAD %p, :: (load 1)
2615     // %v2(s32) = G_ZEXT %v(s8)
2616     if (!IsSigned) {
2617       auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
2618       bool IsGPR =
2619           RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
2620       if (LoadMI && IsGPR) {
2621         const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
2622         unsigned BytesLoaded = MemOp->getSize();
2623         if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
2624           return selectCopy(I, TII, MRI, TRI, RBI);
2625       }
2626 
2627       // If we are zero extending from 32 bits to 64 bits, it's possible that
2628       // the instruction implicitly does the zero extend for us. In that case,
2629       // we can just emit a SUBREG_TO_REG.
2630       if (IsGPR && SrcSize == 32 && DstSize == 64) {
2631         // Unlike with the G_LOAD case, we don't want to look through copies
2632         // here.
2633         MachineInstr *Def = MRI.getVRegDef(SrcReg);
2634         if (Def && isDef32(*Def)) {
2635           MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
2636               .addImm(0)
2637               .addUse(SrcReg)
2638               .addImm(AArch64::sub_32);
2639 
2640           if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
2641                                             MRI)) {
2642             LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
2643             return false;
2644           }
2645 
2646           if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
2647                                             MRI)) {
2648             LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
2649             return false;
2650           }
2651 
2652           I.eraseFromParent();
2653           return true;
2654         }
2655       }
2656     }
2657 
2658     if (DstSize == 64) {
2659       if (Opcode != TargetOpcode::G_SEXT_INREG) {
2660         // FIXME: Can we avoid manually doing this?
2661         if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
2662                                           MRI)) {
2663           LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
2664                             << " operand\n");
2665           return false;
2666         }
2667         SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
2668                                 {&AArch64::GPR64RegClass}, {})
2669                      .addImm(0)
2670                      .addUse(SrcReg)
2671                      .addImm(AArch64::sub_32)
2672                      .getReg(0);
2673       }
2674 
2675       ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
2676                              {DefReg}, {SrcReg})
2677                   .addImm(0)
2678                   .addImm(SrcSize - 1);
2679     } else if (DstSize <= 32) {
2680       ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
2681                              {DefReg}, {SrcReg})
2682                   .addImm(0)
2683                   .addImm(SrcSize - 1);
2684     } else {
2685       return false;
2686     }
2687 
2688     constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
2689     I.eraseFromParent();
2690     return true;
2691   }
2692 
2693   case TargetOpcode::G_SITOFP:
2694   case TargetOpcode::G_UITOFP:
2695   case TargetOpcode::G_FPTOSI:
2696   case TargetOpcode::G_FPTOUI: {
2697     const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
2698               SrcTy = MRI.getType(I.getOperand(1).getReg());
2699     const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
2700     if (NewOpc == Opcode)
2701       return false;
2702 
2703     I.setDesc(TII.get(NewOpc));
2704     constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2705 
2706     return true;
2707   }
2708 
2709   case TargetOpcode::G_FREEZE:
2710     return selectCopy(I, TII, MRI, TRI, RBI);
2711 
2712   case TargetOpcode::G_INTTOPTR:
2713     // The importer is currently unable to import pointer types since they
2714     // didn't exist in SelectionDAG.
2715     return selectCopy(I, TII, MRI, TRI, RBI);
2716 
2717   case TargetOpcode::G_BITCAST:
2718     // Imported SelectionDAG rules can handle every bitcast except those that
2719     // bitcast from a type to the same type. Ideally, these shouldn't occur
2720     // but we might not run an optimizer that deletes them. The other exception
2721     // is bitcasts involving pointer types, as SelectionDAG has no knowledge
2722     // of them.
2723     return selectCopy(I, TII, MRI, TRI, RBI);
2724 
2725   case TargetOpcode::G_SELECT: {
2726     if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
2727       LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
2728                         << ", expected: " << LLT::scalar(1) << '\n');
2729       return false;
2730     }
2731 
2732     const Register CondReg = I.getOperand(1).getReg();
2733     const Register TReg = I.getOperand(2).getReg();
2734     const Register FReg = I.getOperand(3).getReg();
2735 
2736     if (tryOptSelect(I))
2737       return true;
2738 
2739     Register CSelOpc = selectSelectOpc(I, MRI, RBI);
2740     MachineInstr &TstMI =
2741         *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
2742              .addDef(AArch64::WZR)
2743              .addUse(CondReg)
2744              .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2745 
2746     MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc))
2747                                 .addDef(I.getOperand(0).getReg())
2748                                 .addUse(TReg)
2749                                 .addUse(FReg)
2750                                 .addImm(AArch64CC::NE);
2751 
2752     constrainSelectedInstRegOperands(TstMI, TII, TRI, RBI);
2753     constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI);
2754 
2755     I.eraseFromParent();
2756     return true;
2757   }
2758   case TargetOpcode::G_ICMP: {
2759     if (Ty.isVector())
2760       return selectVectorICmp(I, MRI);
2761 
2762     if (Ty != LLT::scalar(32)) {
2763       LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
2764                         << ", expected: " << LLT::scalar(32) << '\n');
2765       return false;
2766     }
2767 
2768     MachineIRBuilder MIRBuilder(I);
2769     MachineInstr *Cmp;
2770     CmpInst::Predicate Pred;
2771     std::tie(Cmp, Pred) = emitIntegerCompare(I.getOperand(2), I.getOperand(3),
2772                                              I.getOperand(1), MIRBuilder);
2773     if (!Cmp)
2774       return false;
2775     emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIRBuilder);
2776     I.eraseFromParent();
2777     return true;
2778   }
2779 
2780   case TargetOpcode::G_FCMP: {
2781     if (Ty != LLT::scalar(32)) {
2782       LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty
2783                         << ", expected: " << LLT::scalar(32) << '\n');
2784       return false;
2785     }
2786 
2787     unsigned CmpOpc = selectFCMPOpc(I, MRI);
2788     if (!CmpOpc)
2789       return false;
2790 
2791     // FIXME: regbank
2792 
2793     AArch64CC::CondCode CC1, CC2;
2794     changeFCMPPredToAArch64CC(
2795         (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
2796 
2797     // Partially build the compare. Decide if we need to add a use for the
2798     // third operand based off whether or not we're comparing against 0.0.
2799     auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
2800                      .addUse(I.getOperand(2).getReg());
2801 
2802     // If we don't have an immediate compare, then we need to add a use of the
2803     // register which wasn't used for the immediate.
2804     // Note that the immediate will always be the last operand.
2805     if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
2806       CmpMI = CmpMI.addUse(I.getOperand(3).getReg());
2807 
2808     const Register DefReg = I.getOperand(0).getReg();
2809     Register Def1Reg = DefReg;
2810     if (CC2 != AArch64CC::AL)
2811       Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2812 
2813     MachineInstr &CSetMI =
2814         *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2815              .addDef(Def1Reg)
2816              .addUse(AArch64::WZR)
2817              .addUse(AArch64::WZR)
2818              .addImm(getInvertedCondCode(CC1));
2819 
2820     if (CC2 != AArch64CC::AL) {
2821       Register Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2822       MachineInstr &CSet2MI =
2823           *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2824                .addDef(Def2Reg)
2825                .addUse(AArch64::WZR)
2826                .addUse(AArch64::WZR)
2827                .addImm(getInvertedCondCode(CC2));
2828       MachineInstr &OrMI =
2829           *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
2830                .addDef(DefReg)
2831                .addUse(Def1Reg)
2832                .addUse(Def2Reg);
2833       constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI);
2834       constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
2835     }
2836     constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
2837     constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
2838 
2839     I.eraseFromParent();
2840     return true;
2841   }
2842   case TargetOpcode::G_VASTART:
2843     return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
2844                                 : selectVaStartAAPCS(I, MF, MRI);
2845   case TargetOpcode::G_INTRINSIC:
2846     return selectIntrinsic(I, MRI);
2847   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
2848     return selectIntrinsicWithSideEffects(I, MRI);
2849   case TargetOpcode::G_IMPLICIT_DEF: {
2850     I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
2851     const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2852     const Register DstReg = I.getOperand(0).getReg();
2853     const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2854     const TargetRegisterClass *DstRC =
2855         getRegClassForTypeOnBank(DstTy, DstRB, RBI);
2856     RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
2857     return true;
2858   }
2859   case TargetOpcode::G_BLOCK_ADDR: {
2860     if (TM.getCodeModel() == CodeModel::Large) {
2861       materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
2862       I.eraseFromParent();
2863       return true;
2864     } else {
2865       I.setDesc(TII.get(AArch64::MOVaddrBA));
2866       auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
2867                            I.getOperand(0).getReg())
2868                        .addBlockAddress(I.getOperand(1).getBlockAddress(),
2869                                         /* Offset */ 0, AArch64II::MO_PAGE)
2870                        .addBlockAddress(
2871                            I.getOperand(1).getBlockAddress(), /* Offset */ 0,
2872                            AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
2873       I.eraseFromParent();
2874       return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2875     }
2876   }
2877   case TargetOpcode::G_INTRINSIC_TRUNC:
2878     return selectIntrinsicTrunc(I, MRI);
2879   case TargetOpcode::G_INTRINSIC_ROUND:
2880     return selectIntrinsicRound(I, MRI);
2881   case TargetOpcode::G_BUILD_VECTOR:
2882     return selectBuildVector(I, MRI);
2883   case TargetOpcode::G_MERGE_VALUES:
2884     return selectMergeValues(I, MRI);
2885   case TargetOpcode::G_UNMERGE_VALUES:
2886     return selectUnmergeValues(I, MRI);
2887   case TargetOpcode::G_SHUFFLE_VECTOR:
2888     return selectShuffleVector(I, MRI);
2889   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
2890     return selectExtractElt(I, MRI);
2891   case TargetOpcode::G_INSERT_VECTOR_ELT:
2892     return selectInsertElt(I, MRI);
2893   case TargetOpcode::G_CONCAT_VECTORS:
2894     return selectConcatVectors(I, MRI);
2895   case TargetOpcode::G_JUMP_TABLE:
2896     return selectJumpTable(I, MRI);
2897   }
2898 
2899   return false;
2900 }
2901 
selectBrJT(MachineInstr & I,MachineRegisterInfo & MRI) const2902 bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
2903                                             MachineRegisterInfo &MRI) const {
2904   assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
2905   Register JTAddr = I.getOperand(0).getReg();
2906   unsigned JTI = I.getOperand(1).getIndex();
2907   Register Index = I.getOperand(2).getReg();
2908   MachineIRBuilder MIB(I);
2909 
2910   Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2911   Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
2912   auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
2913                                       {TargetReg, ScratchReg}, {JTAddr, Index})
2914                            .addJumpTableIndex(JTI);
2915   // Build the indirect branch.
2916   MIB.buildInstr(AArch64::BR, {}, {TargetReg});
2917   I.eraseFromParent();
2918   return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
2919 }
2920 
selectJumpTable(MachineInstr & I,MachineRegisterInfo & MRI) const2921 bool AArch64InstructionSelector::selectJumpTable(
2922     MachineInstr &I, MachineRegisterInfo &MRI) const {
2923   assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
2924   assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
2925 
2926   Register DstReg = I.getOperand(0).getReg();
2927   unsigned JTI = I.getOperand(1).getIndex();
2928   // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
2929   MachineIRBuilder MIB(I);
2930   auto MovMI =
2931     MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
2932           .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
2933           .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
2934   I.eraseFromParent();
2935   return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2936 }
2937 
selectTLSGlobalValue(MachineInstr & I,MachineRegisterInfo & MRI) const2938 bool AArch64InstructionSelector::selectTLSGlobalValue(
2939     MachineInstr &I, MachineRegisterInfo &MRI) const {
2940   if (!STI.isTargetMachO())
2941     return false;
2942   MachineFunction &MF = *I.getParent()->getParent();
2943   MF.getFrameInfo().setAdjustsStack(true);
2944 
2945   const GlobalValue &GV = *I.getOperand(1).getGlobal();
2946   MachineIRBuilder MIB(I);
2947 
2948   MIB.buildInstr(AArch64::LOADgot, {AArch64::X0}, {})
2949       .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
2950 
2951   auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
2952                              {Register(AArch64::X0)})
2953                   .addImm(0);
2954 
2955   // TLS calls preserve all registers except those that absolutely must be
2956   // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
2957   // silly).
2958   MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
2959       .addDef(AArch64::X0, RegState::Implicit)
2960       .addRegMask(TRI.getTLSCallPreservedMask());
2961 
2962   MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
2963   RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
2964                                MRI);
2965   I.eraseFromParent();
2966   return true;
2967 }
2968 
selectIntrinsicTrunc(MachineInstr & I,MachineRegisterInfo & MRI) const2969 bool AArch64InstructionSelector::selectIntrinsicTrunc(
2970     MachineInstr &I, MachineRegisterInfo &MRI) const {
2971   const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2972 
2973   // Select the correct opcode.
2974   unsigned Opc = 0;
2975   if (!SrcTy.isVector()) {
2976     switch (SrcTy.getSizeInBits()) {
2977     default:
2978     case 16:
2979       Opc = AArch64::FRINTZHr;
2980       break;
2981     case 32:
2982       Opc = AArch64::FRINTZSr;
2983       break;
2984     case 64:
2985       Opc = AArch64::FRINTZDr;
2986       break;
2987     }
2988   } else {
2989     unsigned NumElts = SrcTy.getNumElements();
2990     switch (SrcTy.getElementType().getSizeInBits()) {
2991     default:
2992       break;
2993     case 16:
2994       if (NumElts == 4)
2995         Opc = AArch64::FRINTZv4f16;
2996       else if (NumElts == 8)
2997         Opc = AArch64::FRINTZv8f16;
2998       break;
2999     case 32:
3000       if (NumElts == 2)
3001         Opc = AArch64::FRINTZv2f32;
3002       else if (NumElts == 4)
3003         Opc = AArch64::FRINTZv4f32;
3004       break;
3005     case 64:
3006       if (NumElts == 2)
3007         Opc = AArch64::FRINTZv2f64;
3008       break;
3009     }
3010   }
3011 
3012   if (!Opc) {
3013     // Didn't get an opcode above, bail.
3014     LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
3015     return false;
3016   }
3017 
3018   // Legalization would have set us up perfectly for this; we just need to
3019   // set the opcode and move on.
3020   I.setDesc(TII.get(Opc));
3021   return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3022 }
3023 
selectIntrinsicRound(MachineInstr & I,MachineRegisterInfo & MRI) const3024 bool AArch64InstructionSelector::selectIntrinsicRound(
3025     MachineInstr &I, MachineRegisterInfo &MRI) const {
3026   const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3027 
3028   // Select the correct opcode.
3029   unsigned Opc = 0;
3030   if (!SrcTy.isVector()) {
3031     switch (SrcTy.getSizeInBits()) {
3032     default:
3033     case 16:
3034       Opc = AArch64::FRINTAHr;
3035       break;
3036     case 32:
3037       Opc = AArch64::FRINTASr;
3038       break;
3039     case 64:
3040       Opc = AArch64::FRINTADr;
3041       break;
3042     }
3043   } else {
3044     unsigned NumElts = SrcTy.getNumElements();
3045     switch (SrcTy.getElementType().getSizeInBits()) {
3046     default:
3047       break;
3048     case 16:
3049       if (NumElts == 4)
3050         Opc = AArch64::FRINTAv4f16;
3051       else if (NumElts == 8)
3052         Opc = AArch64::FRINTAv8f16;
3053       break;
3054     case 32:
3055       if (NumElts == 2)
3056         Opc = AArch64::FRINTAv2f32;
3057       else if (NumElts == 4)
3058         Opc = AArch64::FRINTAv4f32;
3059       break;
3060     case 64:
3061       if (NumElts == 2)
3062         Opc = AArch64::FRINTAv2f64;
3063       break;
3064     }
3065   }
3066 
3067   if (!Opc) {
3068     // Didn't get an opcode above, bail.
3069     LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
3070     return false;
3071   }
3072 
3073   // Legalization would have set us up perfectly for this; we just need to
3074   // set the opcode and move on.
3075   I.setDesc(TII.get(Opc));
3076   return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3077 }
3078 
selectVectorICmp(MachineInstr & I,MachineRegisterInfo & MRI) const3079 bool AArch64InstructionSelector::selectVectorICmp(
3080     MachineInstr &I, MachineRegisterInfo &MRI) const {
3081   Register DstReg = I.getOperand(0).getReg();
3082   LLT DstTy = MRI.getType(DstReg);
3083   Register SrcReg = I.getOperand(2).getReg();
3084   Register Src2Reg = I.getOperand(3).getReg();
3085   LLT SrcTy = MRI.getType(SrcReg);
3086 
3087   unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
3088   unsigned NumElts = DstTy.getNumElements();
3089 
3090   // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
3091   // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
3092   // Third index is cc opcode:
3093   // 0 == eq
3094   // 1 == ugt
3095   // 2 == uge
3096   // 3 == ult
3097   // 4 == ule
3098   // 5 == sgt
3099   // 6 == sge
3100   // 7 == slt
3101   // 8 == sle
3102   // ne is done by negating 'eq' result.
3103 
3104   // This table below assumes that for some comparisons the operands will be
3105   // commuted.
3106   // ult op == commute + ugt op
3107   // ule op == commute + uge op
3108   // slt op == commute + sgt op
3109   // sle op == commute + sge op
3110   unsigned PredIdx = 0;
3111   bool SwapOperands = false;
3112   CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
3113   switch (Pred) {
3114   case CmpInst::ICMP_NE:
3115   case CmpInst::ICMP_EQ:
3116     PredIdx = 0;
3117     break;
3118   case CmpInst::ICMP_UGT:
3119     PredIdx = 1;
3120     break;
3121   case CmpInst::ICMP_UGE:
3122     PredIdx = 2;
3123     break;
3124   case CmpInst::ICMP_ULT:
3125     PredIdx = 3;
3126     SwapOperands = true;
3127     break;
3128   case CmpInst::ICMP_ULE:
3129     PredIdx = 4;
3130     SwapOperands = true;
3131     break;
3132   case CmpInst::ICMP_SGT:
3133     PredIdx = 5;
3134     break;
3135   case CmpInst::ICMP_SGE:
3136     PredIdx = 6;
3137     break;
3138   case CmpInst::ICMP_SLT:
3139     PredIdx = 7;
3140     SwapOperands = true;
3141     break;
3142   case CmpInst::ICMP_SLE:
3143     PredIdx = 8;
3144     SwapOperands = true;
3145     break;
3146   default:
3147     llvm_unreachable("Unhandled icmp predicate");
3148     return false;
3149   }
3150 
3151   // This table obviously should be tablegen'd when we have our GISel native
3152   // tablegen selector.
3153 
3154   static const unsigned OpcTable[4][4][9] = {
3155       {
3156           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3157            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3158            0 /* invalid */},
3159           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3160            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3161            0 /* invalid */},
3162           {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3163            AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3164            AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3165           {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3166            AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3167            AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3168       },
3169       {
3170           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3171            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3172            0 /* invalid */},
3173           {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3174            AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3175            AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3176           {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3177            AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3178            AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3179           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3180            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3181            0 /* invalid */}
3182       },
3183       {
3184           {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3185            AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3186            AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3187           {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3188            AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3189            AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3190           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3191            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3192            0 /* invalid */},
3193           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3194            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3195            0 /* invalid */}
3196       },
3197       {
3198           {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3199            AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3200            AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3201           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3202            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3203            0 /* invalid */},
3204           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3205            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3206            0 /* invalid */},
3207           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3208            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3209            0 /* invalid */}
3210       },
3211   };
3212   unsigned EltIdx = Log2_32(SrcEltSize / 8);
3213   unsigned NumEltsIdx = Log2_32(NumElts / 2);
3214   unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3215   if (!Opc) {
3216     LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
3217     return false;
3218   }
3219 
3220   const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3221   const TargetRegisterClass *SrcRC =
3222       getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
3223   if (!SrcRC) {
3224     LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3225     return false;
3226   }
3227 
3228   unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3229   if (SrcTy.getSizeInBits() == 128)
3230     NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3231 
3232   if (SwapOperands)
3233     std::swap(SrcReg, Src2Reg);
3234 
3235   MachineIRBuilder MIB(I);
3236   auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
3237   constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3238 
3239   // Invert if we had a 'ne' cc.
3240   if (NotOpc) {
3241     Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
3242     constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3243   } else {
3244     MIB.buildCopy(DstReg, Cmp.getReg(0));
3245   }
3246   RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
3247   I.eraseFromParent();
3248   return true;
3249 }
3250 
emitScalarToVector(unsigned EltSize,const TargetRegisterClass * DstRC,Register Scalar,MachineIRBuilder & MIRBuilder) const3251 MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3252     unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3253     MachineIRBuilder &MIRBuilder) const {
3254   auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3255 
3256   auto BuildFn = [&](unsigned SubregIndex) {
3257     auto Ins =
3258         MIRBuilder
3259             .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3260             .addImm(SubregIndex);
3261     constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
3262     constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
3263     return &*Ins;
3264   };
3265 
3266   switch (EltSize) {
3267   case 16:
3268     return BuildFn(AArch64::hsub);
3269   case 32:
3270     return BuildFn(AArch64::ssub);
3271   case 64:
3272     return BuildFn(AArch64::dsub);
3273   default:
3274     return nullptr;
3275   }
3276 }
3277 
selectMergeValues(MachineInstr & I,MachineRegisterInfo & MRI) const3278 bool AArch64InstructionSelector::selectMergeValues(
3279     MachineInstr &I, MachineRegisterInfo &MRI) const {
3280   assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3281   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3282   const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3283   assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3284   const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3285 
3286   if (I.getNumOperands() != 3)
3287     return false;
3288 
3289   // Merging 2 s64s into an s128.
3290   if (DstTy == LLT::scalar(128)) {
3291     if (SrcTy.getSizeInBits() != 64)
3292       return false;
3293     MachineIRBuilder MIB(I);
3294     Register DstReg = I.getOperand(0).getReg();
3295     Register Src1Reg = I.getOperand(1).getReg();
3296     Register Src2Reg = I.getOperand(2).getReg();
3297     auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3298     MachineInstr *InsMI =
3299         emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
3300     if (!InsMI)
3301       return false;
3302     MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3303                                           Src2Reg, /* LaneIdx */ 1, RB, MIB);
3304     if (!Ins2MI)
3305       return false;
3306     constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3307     constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
3308     I.eraseFromParent();
3309     return true;
3310   }
3311 
3312   if (RB.getID() != AArch64::GPRRegBankID)
3313     return false;
3314 
3315   if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3316     return false;
3317 
3318   auto *DstRC = &AArch64::GPR64RegClass;
3319   Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3320   MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3321                                     TII.get(TargetOpcode::SUBREG_TO_REG))
3322                                 .addDef(SubToRegDef)
3323                                 .addImm(0)
3324                                 .addUse(I.getOperand(1).getReg())
3325                                 .addImm(AArch64::sub_32);
3326   Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3327   // Need to anyext the second scalar before we can use bfm
3328   MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3329                                     TII.get(TargetOpcode::SUBREG_TO_REG))
3330                                 .addDef(SubToRegDef2)
3331                                 .addImm(0)
3332                                 .addUse(I.getOperand(2).getReg())
3333                                 .addImm(AArch64::sub_32);
3334   MachineInstr &BFM =
3335       *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3336            .addDef(I.getOperand(0).getReg())
3337            .addUse(SubToRegDef)
3338            .addUse(SubToRegDef2)
3339            .addImm(32)
3340            .addImm(31);
3341   constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3342   constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3343   constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
3344   I.eraseFromParent();
3345   return true;
3346 }
3347 
getLaneCopyOpcode(unsigned & CopyOpc,unsigned & ExtractSubReg,const unsigned EltSize)3348 static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3349                               const unsigned EltSize) {
3350   // Choose a lane copy opcode and subregister based off of the size of the
3351   // vector's elements.
3352   switch (EltSize) {
3353   case 16:
3354     CopyOpc = AArch64::CPYi16;
3355     ExtractSubReg = AArch64::hsub;
3356     break;
3357   case 32:
3358     CopyOpc = AArch64::CPYi32;
3359     ExtractSubReg = AArch64::ssub;
3360     break;
3361   case 64:
3362     CopyOpc = AArch64::CPYi64;
3363     ExtractSubReg = AArch64::dsub;
3364     break;
3365   default:
3366     // Unknown size, bail out.
3367     LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
3368     return false;
3369   }
3370   return true;
3371 }
3372 
emitExtractVectorElt(Optional<Register> DstReg,const RegisterBank & DstRB,LLT ScalarTy,Register VecReg,unsigned LaneIdx,MachineIRBuilder & MIRBuilder) const3373 MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3374     Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3375     Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3376   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3377   unsigned CopyOpc = 0;
3378   unsigned ExtractSubReg = 0;
3379   if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3380     LLVM_DEBUG(
3381         dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
3382     return nullptr;
3383   }
3384 
3385   const TargetRegisterClass *DstRC =
3386       getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
3387   if (!DstRC) {
3388     LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
3389     return nullptr;
3390   }
3391 
3392   const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3393   const LLT &VecTy = MRI.getType(VecReg);
3394   const TargetRegisterClass *VecRC =
3395       getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
3396   if (!VecRC) {
3397     LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3398     return nullptr;
3399   }
3400 
3401   // The register that we're going to copy into.
3402   Register InsertReg = VecReg;
3403   if (!DstReg)
3404     DstReg = MRI.createVirtualRegister(DstRC);
3405   // If the lane index is 0, we just use a subregister COPY.
3406   if (LaneIdx == 0) {
3407     auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3408                     .addReg(VecReg, 0, ExtractSubReg);
3409     RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3410     return &*Copy;
3411   }
3412 
3413   // Lane copies require 128-bit wide registers. If we're dealing with an
3414   // unpacked vector, then we need to move up to that width. Insert an implicit
3415   // def and a subregister insert to get us there.
3416   if (VecTy.getSizeInBits() != 128) {
3417     MachineInstr *ScalarToVector = emitScalarToVector(
3418         VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3419     if (!ScalarToVector)
3420       return nullptr;
3421     InsertReg = ScalarToVector->getOperand(0).getReg();
3422   }
3423 
3424   MachineInstr *LaneCopyMI =
3425       MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3426   constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
3427 
3428   // Make sure that we actually constrain the initial copy.
3429   RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3430   return LaneCopyMI;
3431 }
3432 
selectExtractElt(MachineInstr & I,MachineRegisterInfo & MRI) const3433 bool AArch64InstructionSelector::selectExtractElt(
3434     MachineInstr &I, MachineRegisterInfo &MRI) const {
3435   assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
3436          "unexpected opcode!");
3437   Register DstReg = I.getOperand(0).getReg();
3438   const LLT NarrowTy = MRI.getType(DstReg);
3439   const Register SrcReg = I.getOperand(1).getReg();
3440   const LLT WideTy = MRI.getType(SrcReg);
3441   (void)WideTy;
3442   assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
3443          "source register size too small!");
3444   assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
3445 
3446   // Need the lane index to determine the correct copy opcode.
3447   MachineOperand &LaneIdxOp = I.getOperand(2);
3448   assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
3449 
3450   if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
3451     LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
3452     return false;
3453   }
3454 
3455   // Find the index to extract from.
3456   auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
3457   if (!VRegAndVal)
3458     return false;
3459   unsigned LaneIdx = VRegAndVal->Value;
3460 
3461   MachineIRBuilder MIRBuilder(I);
3462 
3463   const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3464   MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
3465                                                LaneIdx, MIRBuilder);
3466   if (!Extract)
3467     return false;
3468 
3469   I.eraseFromParent();
3470   return true;
3471 }
3472 
selectSplitVectorUnmerge(MachineInstr & I,MachineRegisterInfo & MRI) const3473 bool AArch64InstructionSelector::selectSplitVectorUnmerge(
3474     MachineInstr &I, MachineRegisterInfo &MRI) const {
3475   unsigned NumElts = I.getNumOperands() - 1;
3476   Register SrcReg = I.getOperand(NumElts).getReg();
3477   const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3478   const LLT SrcTy = MRI.getType(SrcReg);
3479 
3480   assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
3481   if (SrcTy.getSizeInBits() > 128) {
3482     LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
3483     return false;
3484   }
3485 
3486   MachineIRBuilder MIB(I);
3487 
3488   // We implement a split vector operation by treating the sub-vectors as
3489   // scalars and extracting them.
3490   const RegisterBank &DstRB =
3491       *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
3492   for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
3493     Register Dst = I.getOperand(OpIdx).getReg();
3494     MachineInstr *Extract =
3495         emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
3496     if (!Extract)
3497       return false;
3498   }
3499   I.eraseFromParent();
3500   return true;
3501 }
3502 
selectUnmergeValues(MachineInstr & I,MachineRegisterInfo & MRI) const3503 bool AArch64InstructionSelector::selectUnmergeValues(
3504     MachineInstr &I, MachineRegisterInfo &MRI) const {
3505   assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
3506          "unexpected opcode");
3507 
3508   // TODO: Handle unmerging into GPRs and from scalars to scalars.
3509   if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
3510           AArch64::FPRRegBankID ||
3511       RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3512           AArch64::FPRRegBankID) {
3513     LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
3514                          "currently unsupported.\n");
3515     return false;
3516   }
3517 
3518   // The last operand is the vector source register, and every other operand is
3519   // a register to unpack into.
3520   unsigned NumElts = I.getNumOperands() - 1;
3521   Register SrcReg = I.getOperand(NumElts).getReg();
3522   const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3523   const LLT WideTy = MRI.getType(SrcReg);
3524   (void)WideTy;
3525   assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
3526          "can only unmerge from vector or s128 types!");
3527   assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
3528          "source register size too small!");
3529 
3530   if (!NarrowTy.isScalar())
3531     return selectSplitVectorUnmerge(I, MRI);
3532 
3533   MachineIRBuilder MIB(I);
3534 
3535   // Choose a lane copy opcode and subregister based off of the size of the
3536   // vector's elements.
3537   unsigned CopyOpc = 0;
3538   unsigned ExtractSubReg = 0;
3539   if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
3540     return false;
3541 
3542   // Set up for the lane copies.
3543   MachineBasicBlock &MBB = *I.getParent();
3544 
3545   // Stores the registers we'll be copying from.
3546   SmallVector<Register, 4> InsertRegs;
3547 
3548   // We'll use the first register twice, so we only need NumElts-1 registers.
3549   unsigned NumInsertRegs = NumElts - 1;
3550 
3551   // If our elements fit into exactly 128 bits, then we can copy from the source
3552   // directly. Otherwise, we need to do a bit of setup with some subregister
3553   // inserts.
3554   if (NarrowTy.getSizeInBits() * NumElts == 128) {
3555     InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
3556   } else {
3557     // No. We have to perform subregister inserts. For each insert, create an
3558     // implicit def and a subregister insert, and save the register we create.
3559     for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
3560       Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
3561       MachineInstr &ImpDefMI =
3562           *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
3563                    ImpDefReg);
3564 
3565       // Now, create the subregister insert from SrcReg.
3566       Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
3567       MachineInstr &InsMI =
3568           *BuildMI(MBB, I, I.getDebugLoc(),
3569                    TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
3570                .addUse(ImpDefReg)
3571                .addUse(SrcReg)
3572                .addImm(AArch64::dsub);
3573 
3574       constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
3575       constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
3576 
3577       // Save the register so that we can copy from it after.
3578       InsertRegs.push_back(InsertReg);
3579     }
3580   }
3581 
3582   // Now that we've created any necessary subregister inserts, we can
3583   // create the copies.
3584   //
3585   // Perform the first copy separately as a subregister copy.
3586   Register CopyTo = I.getOperand(0).getReg();
3587   auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
3588                        .addReg(InsertRegs[0], 0, ExtractSubReg);
3589   constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
3590 
3591   // Now, perform the remaining copies as vector lane copies.
3592   unsigned LaneIdx = 1;
3593   for (Register InsReg : InsertRegs) {
3594     Register CopyTo = I.getOperand(LaneIdx).getReg();
3595     MachineInstr &CopyInst =
3596         *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
3597              .addUse(InsReg)
3598              .addImm(LaneIdx);
3599     constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
3600     ++LaneIdx;
3601   }
3602 
3603   // Separately constrain the first copy's destination. Because of the
3604   // limitation in constrainOperandRegClass, we can't guarantee that this will
3605   // actually be constrained. So, do it ourselves using the second operand.
3606   const TargetRegisterClass *RC =
3607       MRI.getRegClassOrNull(I.getOperand(1).getReg());
3608   if (!RC) {
3609     LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
3610     return false;
3611   }
3612 
3613   RBI.constrainGenericRegister(CopyTo, *RC, MRI);
3614   I.eraseFromParent();
3615   return true;
3616 }
3617 
selectConcatVectors(MachineInstr & I,MachineRegisterInfo & MRI) const3618 bool AArch64InstructionSelector::selectConcatVectors(
3619     MachineInstr &I, MachineRegisterInfo &MRI) const {
3620   assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
3621          "Unexpected opcode");
3622   Register Dst = I.getOperand(0).getReg();
3623   Register Op1 = I.getOperand(1).getReg();
3624   Register Op2 = I.getOperand(2).getReg();
3625   MachineIRBuilder MIRBuilder(I);
3626   MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
3627   if (!ConcatMI)
3628     return false;
3629   I.eraseFromParent();
3630   return true;
3631 }
3632 
3633 unsigned
emitConstantPoolEntry(const Constant * CPVal,MachineFunction & MF) const3634 AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
3635                                                   MachineFunction &MF) const {
3636   Type *CPTy = CPVal->getType();
3637   Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
3638 
3639   MachineConstantPool *MCP = MF.getConstantPool();
3640   return MCP->getConstantPoolIndex(CPVal, Alignment);
3641 }
3642 
emitLoadFromConstantPool(const Constant * CPVal,MachineIRBuilder & MIRBuilder) const3643 MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
3644     const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
3645   unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
3646 
3647   auto Adrp =
3648       MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
3649           .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
3650 
3651   MachineInstr *LoadMI = nullptr;
3652   switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
3653   case 16:
3654     LoadMI =
3655         &*MIRBuilder
3656               .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
3657               .addConstantPoolIndex(CPIdx, 0,
3658                                     AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3659     break;
3660   case 8:
3661     LoadMI = &*MIRBuilder
3662                  .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
3663                  .addConstantPoolIndex(
3664                      CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3665     break;
3666   default:
3667     LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
3668                       << *CPVal->getType());
3669     return nullptr;
3670   }
3671   constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
3672   constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
3673   return LoadMI;
3674 }
3675 
3676 /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
3677 /// size and RB.
3678 static std::pair<unsigned, unsigned>
getInsertVecEltOpInfo(const RegisterBank & RB,unsigned EltSize)3679 getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
3680   unsigned Opc, SubregIdx;
3681   if (RB.getID() == AArch64::GPRRegBankID) {
3682     if (EltSize == 32) {
3683       Opc = AArch64::INSvi32gpr;
3684       SubregIdx = AArch64::ssub;
3685     } else if (EltSize == 64) {
3686       Opc = AArch64::INSvi64gpr;
3687       SubregIdx = AArch64::dsub;
3688     } else {
3689       llvm_unreachable("invalid elt size!");
3690     }
3691   } else {
3692     if (EltSize == 8) {
3693       Opc = AArch64::INSvi8lane;
3694       SubregIdx = AArch64::bsub;
3695     } else if (EltSize == 16) {
3696       Opc = AArch64::INSvi16lane;
3697       SubregIdx = AArch64::hsub;
3698     } else if (EltSize == 32) {
3699       Opc = AArch64::INSvi32lane;
3700       SubregIdx = AArch64::ssub;
3701     } else if (EltSize == 64) {
3702       Opc = AArch64::INSvi64lane;
3703       SubregIdx = AArch64::dsub;
3704     } else {
3705       llvm_unreachable("invalid elt size!");
3706     }
3707   }
3708   return std::make_pair(Opc, SubregIdx);
3709 }
3710 
3711 MachineInstr *
emitADD(Register DefReg,MachineOperand & LHS,MachineOperand & RHS,MachineIRBuilder & MIRBuilder) const3712 AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
3713                                     MachineOperand &RHS,
3714                                     MachineIRBuilder &MIRBuilder) const {
3715   assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3716   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3717   static const unsigned OpcTable[2][2]{{AArch64::ADDXrr, AArch64::ADDXri},
3718                                        {AArch64::ADDWrr, AArch64::ADDWri}};
3719   bool Is32Bit = MRI.getType(LHS.getReg()).getSizeInBits() == 32;
3720   auto ImmFns = selectArithImmed(RHS);
3721   unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
3722   auto AddMI = MIRBuilder.buildInstr(Opc, {DefReg}, {LHS});
3723 
3724   // If we matched a valid constant immediate, add those operands.
3725   if (ImmFns) {
3726     for (auto &RenderFn : *ImmFns)
3727       RenderFn(AddMI);
3728   } else {
3729     AddMI.addUse(RHS.getReg());
3730   }
3731 
3732   constrainSelectedInstRegOperands(*AddMI, TII, TRI, RBI);
3733   return &*AddMI;
3734 }
3735 
3736 MachineInstr *
emitCMN(MachineOperand & LHS,MachineOperand & RHS,MachineIRBuilder & MIRBuilder) const3737 AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
3738                                     MachineIRBuilder &MIRBuilder) const {
3739   assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3740   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3741   static const unsigned OpcTable[2][2]{{AArch64::ADDSXrr, AArch64::ADDSXri},
3742                                        {AArch64::ADDSWrr, AArch64::ADDSWri}};
3743   bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
3744   auto ImmFns = selectArithImmed(RHS);
3745   unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
3746   Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
3747 
3748   auto CmpMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS});
3749 
3750   // If we matched a valid constant immediate, add those operands.
3751   if (ImmFns) {
3752     for (auto &RenderFn : *ImmFns)
3753       RenderFn(CmpMI);
3754   } else {
3755     CmpMI.addUse(RHS.getReg());
3756   }
3757 
3758   constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3759   return &*CmpMI;
3760 }
3761 
3762 MachineInstr *
emitTST(const Register & LHS,const Register & RHS,MachineIRBuilder & MIRBuilder) const3763 AArch64InstructionSelector::emitTST(const Register &LHS, const Register &RHS,
3764                                     MachineIRBuilder &MIRBuilder) const {
3765   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3766   unsigned RegSize = MRI.getType(LHS).getSizeInBits();
3767   bool Is32Bit = (RegSize == 32);
3768   static const unsigned OpcTable[2][2]{{AArch64::ANDSXrr, AArch64::ANDSXri},
3769                                        {AArch64::ANDSWrr, AArch64::ANDSWri}};
3770   Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
3771 
3772   // We might be able to fold in an immediate into the TST. We need to make sure
3773   // it's a logical immediate though, since ANDS requires that.
3774   auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI);
3775   bool IsImmForm = ValAndVReg.hasValue() &&
3776                    AArch64_AM::isLogicalImmediate(ValAndVReg->Value, RegSize);
3777   unsigned Opc = OpcTable[Is32Bit][IsImmForm];
3778   auto TstMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS});
3779 
3780   if (IsImmForm)
3781     TstMI.addImm(
3782         AArch64_AM::encodeLogicalImmediate(ValAndVReg->Value, RegSize));
3783   else
3784     TstMI.addUse(RHS);
3785 
3786   constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3787   return &*TstMI;
3788 }
3789 
3790 std::pair<MachineInstr *, CmpInst::Predicate>
emitIntegerCompare(MachineOperand & LHS,MachineOperand & RHS,MachineOperand & Predicate,MachineIRBuilder & MIRBuilder) const3791 AArch64InstructionSelector::emitIntegerCompare(
3792     MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
3793     MachineIRBuilder &MIRBuilder) const {
3794   assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3795   assert(Predicate.isPredicate() && "Expected predicate?");
3796   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3797 
3798   CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
3799 
3800   // Fold the compare if possible.
3801   MachineInstr *FoldCmp =
3802       tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder);
3803   if (FoldCmp)
3804     return {FoldCmp, P};
3805 
3806   // Can't fold into a CMN. Just emit a normal compare.
3807   unsigned CmpOpc = 0;
3808   Register ZReg;
3809 
3810   LLT CmpTy = MRI.getType(LHS.getReg());
3811   assert((CmpTy.isScalar() || CmpTy.isPointer()) &&
3812          "Expected scalar or pointer");
3813   if (CmpTy == LLT::scalar(32)) {
3814     CmpOpc = AArch64::SUBSWrr;
3815     ZReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3816   } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
3817     CmpOpc = AArch64::SUBSXrr;
3818     ZReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3819   } else {
3820     return {nullptr, CmpInst::Predicate::BAD_ICMP_PREDICATE};
3821   }
3822 
3823   // Try to match immediate forms.
3824   MachineInstr *ImmedCmp =
3825       tryOptArithImmedIntegerCompare(LHS, RHS, P, MIRBuilder);
3826   if (ImmedCmp)
3827     return {ImmedCmp, P};
3828 
3829   // If we don't have an immediate, we may have a shift which can be folded
3830   // into the compare.
3831   MachineInstr *ShiftedCmp = tryOptArithShiftedCompare(LHS, RHS, MIRBuilder);
3832   if (ShiftedCmp)
3833     return {ShiftedCmp, P};
3834 
3835   auto CmpMI =
3836       MIRBuilder.buildInstr(CmpOpc, {ZReg}, {LHS.getReg(), RHS.getReg()});
3837   // Make sure that we can constrain the compare that we emitted.
3838   constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3839   return {&*CmpMI, P};
3840 }
3841 
emitVectorConcat(Optional<Register> Dst,Register Op1,Register Op2,MachineIRBuilder & MIRBuilder) const3842 MachineInstr *AArch64InstructionSelector::emitVectorConcat(
3843     Optional<Register> Dst, Register Op1, Register Op2,
3844     MachineIRBuilder &MIRBuilder) const {
3845   // We implement a vector concat by:
3846   // 1. Use scalar_to_vector to insert the lower vector into the larger dest
3847   // 2. Insert the upper vector into the destination's upper element
3848   // TODO: some of this code is common with G_BUILD_VECTOR handling.
3849   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3850 
3851   const LLT Op1Ty = MRI.getType(Op1);
3852   const LLT Op2Ty = MRI.getType(Op2);
3853 
3854   if (Op1Ty != Op2Ty) {
3855     LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
3856     return nullptr;
3857   }
3858   assert(Op1Ty.isVector() && "Expected a vector for vector concat");
3859 
3860   if (Op1Ty.getSizeInBits() >= 128) {
3861     LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
3862     return nullptr;
3863   }
3864 
3865   // At the moment we just support 64 bit vector concats.
3866   if (Op1Ty.getSizeInBits() != 64) {
3867     LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
3868     return nullptr;
3869   }
3870 
3871   const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
3872   const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
3873   const TargetRegisterClass *DstRC =
3874       getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
3875 
3876   MachineInstr *WidenedOp1 =
3877       emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
3878   MachineInstr *WidenedOp2 =
3879       emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
3880   if (!WidenedOp1 || !WidenedOp2) {
3881     LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
3882     return nullptr;
3883   }
3884 
3885   // Now do the insert of the upper element.
3886   unsigned InsertOpc, InsSubRegIdx;
3887   std::tie(InsertOpc, InsSubRegIdx) =
3888       getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
3889 
3890   if (!Dst)
3891     Dst = MRI.createVirtualRegister(DstRC);
3892   auto InsElt =
3893       MIRBuilder
3894           .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
3895           .addImm(1) /* Lane index */
3896           .addUse(WidenedOp2->getOperand(0).getReg())
3897           .addImm(0);
3898   constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
3899   return &*InsElt;
3900 }
3901 
emitFMovForFConstant(MachineInstr & I,MachineRegisterInfo & MRI) const3902 MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
3903     MachineInstr &I, MachineRegisterInfo &MRI) const {
3904   assert(I.getOpcode() == TargetOpcode::G_FCONSTANT &&
3905          "Expected a G_FCONSTANT!");
3906   MachineOperand &ImmOp = I.getOperand(1);
3907   unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
3908 
3909   // Only handle 32 and 64 bit defs for now.
3910   if (DefSize != 32 && DefSize != 64)
3911     return nullptr;
3912 
3913   // Don't handle null values using FMOV.
3914   if (ImmOp.getFPImm()->isNullValue())
3915     return nullptr;
3916 
3917   // Get the immediate representation for the FMOV.
3918   const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF();
3919   int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF)
3920                           : AArch64_AM::getFP64Imm(ImmValAPF);
3921 
3922   // If this is -1, it means the immediate can't be represented as the requested
3923   // floating point value. Bail.
3924   if (Imm == -1)
3925     return nullptr;
3926 
3927   // Update MI to represent the new FMOV instruction, constrain it, and return.
3928   ImmOp.ChangeToImmediate(Imm);
3929   unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi;
3930   I.setDesc(TII.get(MovOpc));
3931   constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3932   return &I;
3933 }
3934 
3935 MachineInstr *
emitCSetForICMP(Register DefReg,unsigned Pred,MachineIRBuilder & MIRBuilder) const3936 AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
3937                                      MachineIRBuilder &MIRBuilder) const {
3938   // CSINC increments the result when the predicate is false. Invert it.
3939   const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
3940       CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
3941   auto I =
3942       MIRBuilder
3943     .buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)})
3944           .addImm(InvCC);
3945   constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
3946   return &*I;
3947 }
3948 
tryOptSelect(MachineInstr & I) const3949 bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
3950   MachineIRBuilder MIB(I);
3951   MachineRegisterInfo &MRI = *MIB.getMRI();
3952   const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
3953 
3954   // We want to recognize this pattern:
3955   //
3956   // $z = G_FCMP pred, $x, $y
3957   // ...
3958   // $w = G_SELECT $z, $a, $b
3959   //
3960   // Where the value of $z is *only* ever used by the G_SELECT (possibly with
3961   // some copies/truncs in between.)
3962   //
3963   // If we see this, then we can emit something like this:
3964   //
3965   // fcmp $x, $y
3966   // fcsel $w, $a, $b, pred
3967   //
3968   // Rather than emitting both of the rather long sequences in the standard
3969   // G_FCMP/G_SELECT select methods.
3970 
3971   // First, check if the condition is defined by a compare.
3972   MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
3973   while (CondDef) {
3974     // We can only fold if all of the defs have one use.
3975     Register CondDefReg = CondDef->getOperand(0).getReg();
3976     if (!MRI.hasOneNonDBGUse(CondDefReg)) {
3977       // Unless it's another select.
3978       for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
3979         if (CondDef == &UI)
3980           continue;
3981         if (UI.getOpcode() != TargetOpcode::G_SELECT)
3982           return false;
3983       }
3984     }
3985 
3986     // We can skip over G_TRUNC since the condition is 1-bit.
3987     // Truncating/extending can have no impact on the value.
3988     unsigned Opc = CondDef->getOpcode();
3989     if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
3990       break;
3991 
3992     // Can't see past copies from physregs.
3993     if (Opc == TargetOpcode::COPY &&
3994         Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
3995       return false;
3996 
3997     CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
3998   }
3999 
4000   // Is the condition defined by a compare?
4001   if (!CondDef)
4002     return false;
4003 
4004   unsigned CondOpc = CondDef->getOpcode();
4005   if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
4006     return false;
4007 
4008   AArch64CC::CondCode CondCode;
4009   if (CondOpc == TargetOpcode::G_ICMP) {
4010     MachineInstr *Cmp;
4011     CmpInst::Predicate Pred;
4012 
4013     std::tie(Cmp, Pred) =
4014         emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
4015                            CondDef->getOperand(1), MIB);
4016 
4017     if (!Cmp) {
4018       LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
4019       return false;
4020     }
4021 
4022     // Have to collect the CondCode after emitIntegerCompare, since it can
4023     // update the predicate.
4024     CondCode = changeICMPPredToAArch64CC(Pred);
4025   } else {
4026     // Get the condition code for the select.
4027     AArch64CC::CondCode CondCode2;
4028     changeFCMPPredToAArch64CC(
4029         (CmpInst::Predicate)CondDef->getOperand(1).getPredicate(), CondCode,
4030         CondCode2);
4031 
4032     // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
4033     // instructions to emit the comparison.
4034     // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
4035     // unnecessary.
4036     if (CondCode2 != AArch64CC::AL)
4037       return false;
4038 
4039     // Make sure we'll be able to select the compare.
4040     unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI);
4041     if (!CmpOpc)
4042       return false;
4043 
4044     // Emit a new compare.
4045     auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()});
4046     if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
4047       Cmp.addUse(CondDef->getOperand(3).getReg());
4048     constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
4049   }
4050 
4051   // Emit the select.
4052   unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
4053   auto CSel =
4054       MIB.buildInstr(CSelOpc, {I.getOperand(0).getReg()},
4055                      {I.getOperand(2).getReg(), I.getOperand(3).getReg()})
4056           .addImm(CondCode);
4057   constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI);
4058   I.eraseFromParent();
4059   return true;
4060 }
4061 
tryFoldIntegerCompare(MachineOperand & LHS,MachineOperand & RHS,MachineOperand & Predicate,MachineIRBuilder & MIRBuilder) const4062 MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
4063     MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4064     MachineIRBuilder &MIRBuilder) const {
4065   assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
4066          "Unexpected MachineOperand");
4067   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4068   // We want to find this sort of thing:
4069   // x = G_SUB 0, y
4070   // G_ICMP z, x
4071   //
4072   // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
4073   // e.g:
4074   //
4075   // cmn z, y
4076 
4077   // Helper lambda to detect the subtract followed by the compare.
4078   // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0.
4079   auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) {
4080     if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB)
4081       return false;
4082 
4083     // Need to make sure NZCV is the same at the end of the transformation.
4084     if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
4085       return false;
4086 
4087     // We want to match against SUBs.
4088     if (DefMI->getOpcode() != TargetOpcode::G_SUB)
4089       return false;
4090 
4091     // Make sure that we're getting
4092     // x = G_SUB 0, y
4093     auto ValAndVReg =
4094         getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI);
4095     if (!ValAndVReg || ValAndVReg->Value != 0)
4096       return false;
4097 
4098     // This can safely be represented as a CMN.
4099     return true;
4100   };
4101 
4102   // Check if the RHS or LHS of the G_ICMP is defined by a SUB
4103   MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
4104   MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
4105   CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
4106   const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P);
4107 
4108   // Given this:
4109   //
4110   // x = G_SUB 0, y
4111   // G_ICMP x, z
4112   //
4113   // Produce this:
4114   //
4115   // cmn y, z
4116   if (IsCMN(LHSDef, CC))
4117     return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
4118 
4119   // Same idea here, but with the RHS of the compare instead:
4120   //
4121   // Given this:
4122   //
4123   // x = G_SUB 0, y
4124   // G_ICMP z, x
4125   //
4126   // Produce this:
4127   //
4128   // cmn z, y
4129   if (IsCMN(RHSDef, CC))
4130     return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
4131 
4132   // Given this:
4133   //
4134   // z = G_AND x, y
4135   // G_ICMP z, 0
4136   //
4137   // Produce this if the compare is signed:
4138   //
4139   // tst x, y
4140   if (!isUnsignedICMPPred(P) && LHSDef &&
4141       LHSDef->getOpcode() == TargetOpcode::G_AND) {
4142     // Make sure that the RHS is 0.
4143     auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
4144     if (!ValAndVReg || ValAndVReg->Value != 0)
4145       return nullptr;
4146 
4147     return emitTST(LHSDef->getOperand(1).getReg(),
4148                    LHSDef->getOperand(2).getReg(), MIRBuilder);
4149   }
4150 
4151   return nullptr;
4152 }
4153 
tryOptArithImmedIntegerCompare(MachineOperand & LHS,MachineOperand & RHS,CmpInst::Predicate & P,MachineIRBuilder & MIB) const4154 MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare(
4155     MachineOperand &LHS, MachineOperand &RHS, CmpInst::Predicate &P,
4156     MachineIRBuilder &MIB) const {
4157   // Attempt to select the immediate form of an integer compare.
4158   MachineRegisterInfo &MRI = *MIB.getMRI();
4159   auto Ty = MRI.getType(LHS.getReg());
4160   assert(!Ty.isVector() && "Expected scalar or pointer only?");
4161   unsigned Size = Ty.getSizeInBits();
4162   assert((Size == 32 || Size == 64) &&
4163          "Expected 32 bit or 64 bit compare only?");
4164 
4165   // Check if this is a case we can already handle.
4166   InstructionSelector::ComplexRendererFns ImmFns;
4167   ImmFns = selectArithImmed(RHS);
4168 
4169   if (!ImmFns) {
4170     // We didn't get a rendering function, but we may still have a constant.
4171     auto MaybeImmed = getImmedFromMO(RHS);
4172     if (!MaybeImmed)
4173       return nullptr;
4174 
4175     // We have a constant, but it doesn't fit. Try adjusting it by one and
4176     // updating the predicate if possible.
4177     uint64_t C = *MaybeImmed;
4178     CmpInst::Predicate NewP;
4179     switch (P) {
4180     default:
4181       return nullptr;
4182     case CmpInst::ICMP_SLT:
4183     case CmpInst::ICMP_SGE:
4184       // Check for
4185       //
4186       // x slt c => x sle c - 1
4187       // x sge c => x sgt c - 1
4188       //
4189       // When c is not the smallest possible negative number.
4190       if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) ||
4191           (Size == 32 && static_cast<int32_t>(C) == INT32_MIN))
4192         return nullptr;
4193       NewP = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT;
4194       C -= 1;
4195       break;
4196     case CmpInst::ICMP_ULT:
4197     case CmpInst::ICMP_UGE:
4198       // Check for
4199       //
4200       // x ult c => x ule c - 1
4201       // x uge c => x ugt c - 1
4202       //
4203       // When c is not zero.
4204       if (C == 0)
4205         return nullptr;
4206       NewP = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
4207       C -= 1;
4208       break;
4209     case CmpInst::ICMP_SLE:
4210     case CmpInst::ICMP_SGT:
4211       // Check for
4212       //
4213       // x sle c => x slt c + 1
4214       // x sgt c => s sge c + 1
4215       //
4216       // When c is not the largest possible signed integer.
4217       if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) ||
4218           (Size == 64 && static_cast<int64_t>(C) == INT64_MAX))
4219         return nullptr;
4220       NewP = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE;
4221       C += 1;
4222       break;
4223     case CmpInst::ICMP_ULE:
4224     case CmpInst::ICMP_UGT:
4225       // Check for
4226       //
4227       // x ule c => x ult c + 1
4228       // x ugt c => s uge c + 1
4229       //
4230       // When c is not the largest possible unsigned integer.
4231       if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) ||
4232           (Size == 64 && C == UINT64_MAX))
4233         return nullptr;
4234       NewP = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;
4235       C += 1;
4236       break;
4237     }
4238 
4239     // Check if the new constant is valid.
4240     if (Size == 32)
4241       C = static_cast<uint32_t>(C);
4242     ImmFns = select12BitValueWithLeftShift(C);
4243     if (!ImmFns)
4244       return nullptr;
4245     P = NewP;
4246   }
4247 
4248   // At this point, we know we can select an immediate form. Go ahead and do
4249   // that.
4250   Register ZReg;
4251   unsigned Opc;
4252   if (Size == 32) {
4253     ZReg = AArch64::WZR;
4254     Opc = AArch64::SUBSWri;
4255   } else {
4256     ZReg = AArch64::XZR;
4257     Opc = AArch64::SUBSXri;
4258   }
4259 
4260   auto CmpMI = MIB.buildInstr(Opc, {ZReg}, {LHS.getReg()});
4261   for (auto &RenderFn : *ImmFns)
4262     RenderFn(CmpMI);
4263   constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
4264   return &*CmpMI;
4265 }
4266 
tryOptArithShiftedCompare(MachineOperand & LHS,MachineOperand & RHS,MachineIRBuilder & MIB) const4267 MachineInstr *AArch64InstructionSelector::tryOptArithShiftedCompare(
4268     MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIB) const {
4269   // We are looking for the following pattern:
4270   //
4271   // shift = G_SHL/ASHR/LHSR y, c
4272   // ...
4273   // cmp = G_ICMP pred, something, shift
4274   //
4275   // Since we will select the G_ICMP to a SUBS, we can potentially fold the
4276   // shift into the subtract.
4277   static const unsigned OpcTable[2] = {AArch64::SUBSWrs, AArch64::SUBSXrs};
4278   static const Register ZRegTable[2] = {AArch64::WZR, AArch64::XZR};
4279   auto ImmFns = selectShiftedRegister(RHS);
4280   if (!ImmFns)
4281     return nullptr;
4282   MachineRegisterInfo &MRI = *MIB.getMRI();
4283   auto Ty = MRI.getType(LHS.getReg());
4284   assert(!Ty.isVector() && "Expected scalar or pointer only?");
4285   unsigned Size = Ty.getSizeInBits();
4286   bool Idx = (Size == 64);
4287   Register ZReg = ZRegTable[Idx];
4288   unsigned Opc = OpcTable[Idx];
4289   auto CmpMI = MIB.buildInstr(Opc, {ZReg}, {LHS.getReg()});
4290   for (auto &RenderFn : *ImmFns)
4291     RenderFn(CmpMI);
4292   constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
4293   return &*CmpMI;
4294 }
4295 
selectShuffleVector(MachineInstr & I,MachineRegisterInfo & MRI) const4296 bool AArch64InstructionSelector::selectShuffleVector(
4297     MachineInstr &I, MachineRegisterInfo &MRI) const {
4298   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4299   Register Src1Reg = I.getOperand(1).getReg();
4300   const LLT Src1Ty = MRI.getType(Src1Reg);
4301   Register Src2Reg = I.getOperand(2).getReg();
4302   const LLT Src2Ty = MRI.getType(Src2Reg);
4303   ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
4304 
4305   MachineBasicBlock &MBB = *I.getParent();
4306   MachineFunction &MF = *MBB.getParent();
4307   LLVMContext &Ctx = MF.getFunction().getContext();
4308 
4309   // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
4310   // it's originated from a <1 x T> type. Those should have been lowered into
4311   // G_BUILD_VECTOR earlier.
4312   if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
4313     LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
4314     return false;
4315   }
4316 
4317   unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
4318 
4319   SmallVector<Constant *, 64> CstIdxs;
4320   for (int Val : Mask) {
4321     // For now, any undef indexes we'll just assume to be 0. This should be
4322     // optimized in future, e.g. to select DUP etc.
4323     Val = Val < 0 ? 0 : Val;
4324     for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
4325       unsigned Offset = Byte + Val * BytesPerElt;
4326       CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
4327     }
4328   }
4329 
4330   MachineIRBuilder MIRBuilder(I);
4331 
4332   // Use a constant pool to load the index vector for TBL.
4333   Constant *CPVal = ConstantVector::get(CstIdxs);
4334   MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
4335   if (!IndexLoad) {
4336     LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
4337     return false;
4338   }
4339 
4340   if (DstTy.getSizeInBits() != 128) {
4341     assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
4342     // This case can be done with TBL1.
4343     MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
4344     if (!Concat) {
4345       LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
4346       return false;
4347     }
4348 
4349     // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
4350     IndexLoad =
4351         emitScalarToVector(64, &AArch64::FPR128RegClass,
4352                            IndexLoad->getOperand(0).getReg(), MIRBuilder);
4353 
4354     auto TBL1 = MIRBuilder.buildInstr(
4355         AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
4356         {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
4357     constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
4358 
4359     auto Copy =
4360         MIRBuilder
4361             .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
4362             .addReg(TBL1.getReg(0), 0, AArch64::dsub);
4363     RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
4364     I.eraseFromParent();
4365     return true;
4366   }
4367 
4368   // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
4369   // Q registers for regalloc.
4370   auto RegSeq = MIRBuilder
4371                     .buildInstr(TargetOpcode::REG_SEQUENCE,
4372                                 {&AArch64::QQRegClass}, {Src1Reg})
4373                     .addImm(AArch64::qsub0)
4374                     .addUse(Src2Reg)
4375                     .addImm(AArch64::qsub1);
4376 
4377   auto TBL2 = MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
4378                                     {RegSeq, IndexLoad->getOperand(0)});
4379   constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
4380   constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
4381   I.eraseFromParent();
4382   return true;
4383 }
4384 
emitLaneInsert(Optional<Register> DstReg,Register SrcReg,Register EltReg,unsigned LaneIdx,const RegisterBank & RB,MachineIRBuilder & MIRBuilder) const4385 MachineInstr *AArch64InstructionSelector::emitLaneInsert(
4386     Optional<Register> DstReg, Register SrcReg, Register EltReg,
4387     unsigned LaneIdx, const RegisterBank &RB,
4388     MachineIRBuilder &MIRBuilder) const {
4389   MachineInstr *InsElt = nullptr;
4390   const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
4391   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4392 
4393   // Create a register to define with the insert if one wasn't passed in.
4394   if (!DstReg)
4395     DstReg = MRI.createVirtualRegister(DstRC);
4396 
4397   unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
4398   unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
4399 
4400   if (RB.getID() == AArch64::FPRRegBankID) {
4401     auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
4402     InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4403                  .addImm(LaneIdx)
4404                  .addUse(InsSub->getOperand(0).getReg())
4405                  .addImm(0);
4406   } else {
4407     InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4408                  .addImm(LaneIdx)
4409                  .addUse(EltReg);
4410   }
4411 
4412   constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4413   return InsElt;
4414 }
4415 
selectInsertElt(MachineInstr & I,MachineRegisterInfo & MRI) const4416 bool AArch64InstructionSelector::selectInsertElt(
4417     MachineInstr &I, MachineRegisterInfo &MRI) const {
4418   assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
4419 
4420   // Get information on the destination.
4421   Register DstReg = I.getOperand(0).getReg();
4422   const LLT DstTy = MRI.getType(DstReg);
4423   unsigned VecSize = DstTy.getSizeInBits();
4424 
4425   // Get information on the element we want to insert into the destination.
4426   Register EltReg = I.getOperand(2).getReg();
4427   const LLT EltTy = MRI.getType(EltReg);
4428   unsigned EltSize = EltTy.getSizeInBits();
4429   if (EltSize < 16 || EltSize > 64)
4430     return false; // Don't support all element types yet.
4431 
4432   // Find the definition of the index. Bail out if it's not defined by a
4433   // G_CONSTANT.
4434   Register IdxReg = I.getOperand(3).getReg();
4435   auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
4436   if (!VRegAndVal)
4437     return false;
4438   unsigned LaneIdx = VRegAndVal->Value;
4439 
4440   // Perform the lane insert.
4441   Register SrcReg = I.getOperand(1).getReg();
4442   const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
4443   MachineIRBuilder MIRBuilder(I);
4444 
4445   if (VecSize < 128) {
4446     // If the vector we're inserting into is smaller than 128 bits, widen it
4447     // to 128 to do the insert.
4448     MachineInstr *ScalarToVec = emitScalarToVector(
4449         VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
4450     if (!ScalarToVec)
4451       return false;
4452     SrcReg = ScalarToVec->getOperand(0).getReg();
4453   }
4454 
4455   // Create an insert into a new FPR128 register.
4456   // Note that if our vector is already 128 bits, we end up emitting an extra
4457   // register.
4458   MachineInstr *InsMI =
4459       emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
4460 
4461   if (VecSize < 128) {
4462     // If we had to widen to perform the insert, then we have to demote back to
4463     // the original size to get the result we want.
4464     Register DemoteVec = InsMI->getOperand(0).getReg();
4465     const TargetRegisterClass *RC =
4466         getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
4467     if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
4468       LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
4469       return false;
4470     }
4471     unsigned SubReg = 0;
4472     if (!getSubRegForClass(RC, TRI, SubReg))
4473       return false;
4474     if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
4475       LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
4476                         << "\n");
4477       return false;
4478     }
4479     MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
4480         .addReg(DemoteVec, 0, SubReg);
4481     RBI.constrainGenericRegister(DstReg, *RC, MRI);
4482   } else {
4483     // No widening needed.
4484     InsMI->getOperand(0).setReg(DstReg);
4485     constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
4486   }
4487 
4488   I.eraseFromParent();
4489   return true;
4490 }
4491 
tryOptConstantBuildVec(MachineInstr & I,LLT DstTy,MachineRegisterInfo & MRI) const4492 bool AArch64InstructionSelector::tryOptConstantBuildVec(
4493     MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) const {
4494   assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4495   assert(DstTy.getSizeInBits() <= 128 && "Unexpected build_vec type!");
4496   if (DstTy.getSizeInBits() < 32)
4497     return false;
4498   // Check if we're building a constant vector, in which case we want to
4499   // generate a constant pool load instead of a vector insert sequence.
4500   SmallVector<Constant *, 16> Csts;
4501   for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
4502     // Try to find G_CONSTANT or G_FCONSTANT
4503     auto *OpMI =
4504         getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
4505     if (OpMI)
4506       Csts.emplace_back(
4507           const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
4508     else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
4509                                   I.getOperand(Idx).getReg(), MRI)))
4510       Csts.emplace_back(
4511           const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
4512     else
4513       return false;
4514   }
4515   Constant *CV = ConstantVector::get(Csts);
4516   MachineIRBuilder MIB(I);
4517   auto *CPLoad = emitLoadFromConstantPool(CV, MIB);
4518   if (!CPLoad) {
4519     LLVM_DEBUG(dbgs() << "Could not generate cp load for build_vector");
4520     return false;
4521   }
4522   MIB.buildCopy(I.getOperand(0), CPLoad->getOperand(0));
4523   RBI.constrainGenericRegister(I.getOperand(0).getReg(),
4524                                *MRI.getRegClass(CPLoad->getOperand(0).getReg()),
4525                                MRI);
4526   I.eraseFromParent();
4527   return true;
4528 }
4529 
selectBuildVector(MachineInstr & I,MachineRegisterInfo & MRI) const4530 bool AArch64InstructionSelector::selectBuildVector(
4531     MachineInstr &I, MachineRegisterInfo &MRI) const {
4532   assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4533   // Until we port more of the optimized selections, for now just use a vector
4534   // insert sequence.
4535   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4536   const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
4537   unsigned EltSize = EltTy.getSizeInBits();
4538 
4539   if (tryOptConstantBuildVec(I, DstTy, MRI))
4540     return true;
4541   if (EltSize < 16 || EltSize > 64)
4542     return false; // Don't support all element types yet.
4543   const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
4544   MachineIRBuilder MIRBuilder(I);
4545 
4546   const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
4547   MachineInstr *ScalarToVec =
4548       emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
4549                          I.getOperand(1).getReg(), MIRBuilder);
4550   if (!ScalarToVec)
4551     return false;
4552 
4553   Register DstVec = ScalarToVec->getOperand(0).getReg();
4554   unsigned DstSize = DstTy.getSizeInBits();
4555 
4556   // Keep track of the last MI we inserted. Later on, we might be able to save
4557   // a copy using it.
4558   MachineInstr *PrevMI = nullptr;
4559   for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
4560     // Note that if we don't do a subregister copy, we can end up making an
4561     // extra register.
4562     PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
4563                               MIRBuilder);
4564     DstVec = PrevMI->getOperand(0).getReg();
4565   }
4566 
4567   // If DstTy's size in bits is less than 128, then emit a subregister copy
4568   // from DstVec to the last register we've defined.
4569   if (DstSize < 128) {
4570     // Force this to be FPR using the destination vector.
4571     const TargetRegisterClass *RC =
4572         getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
4573     if (!RC)
4574       return false;
4575     if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
4576       LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
4577       return false;
4578     }
4579 
4580     unsigned SubReg = 0;
4581     if (!getSubRegForClass(RC, TRI, SubReg))
4582       return false;
4583     if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
4584       LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
4585                         << "\n");
4586       return false;
4587     }
4588 
4589     Register Reg = MRI.createVirtualRegister(RC);
4590     Register DstReg = I.getOperand(0).getReg();
4591 
4592     MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
4593         .addReg(DstVec, 0, SubReg);
4594     MachineOperand &RegOp = I.getOperand(1);
4595     RegOp.setReg(Reg);
4596     RBI.constrainGenericRegister(DstReg, *RC, MRI);
4597   } else {
4598     // We don't need a subregister copy. Save a copy by re-using the
4599     // destination register on the final insert.
4600     assert(PrevMI && "PrevMI was null?");
4601     PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
4602     constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
4603   }
4604 
4605   I.eraseFromParent();
4606   return true;
4607 }
4608 
4609 /// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
4610 /// ID if it exists, and 0 otherwise.
findIntrinsicID(MachineInstr & I)4611 static unsigned findIntrinsicID(MachineInstr &I) {
4612   auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
4613     return Op.isIntrinsicID();
4614   });
4615   if (IntrinOp == I.operands_end())
4616     return 0;
4617   return IntrinOp->getIntrinsicID();
4618 }
4619 
selectIntrinsicWithSideEffects(MachineInstr & I,MachineRegisterInfo & MRI) const4620 bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
4621     MachineInstr &I, MachineRegisterInfo &MRI) const {
4622   // Find the intrinsic ID.
4623   unsigned IntrinID = findIntrinsicID(I);
4624   if (!IntrinID)
4625     return false;
4626   MachineIRBuilder MIRBuilder(I);
4627 
4628   // Select the instruction.
4629   switch (IntrinID) {
4630   default:
4631     return false;
4632   case Intrinsic::trap:
4633     MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
4634     break;
4635   case Intrinsic::debugtrap:
4636     if (!STI.isTargetWindows())
4637       return false;
4638     MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
4639     break;
4640   }
4641 
4642   I.eraseFromParent();
4643   return true;
4644 }
4645 
selectIntrinsic(MachineInstr & I,MachineRegisterInfo & MRI)4646 bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
4647                                                  MachineRegisterInfo &MRI) {
4648   unsigned IntrinID = findIntrinsicID(I);
4649   if (!IntrinID)
4650     return false;
4651   MachineIRBuilder MIRBuilder(I);
4652 
4653   switch (IntrinID) {
4654   default:
4655     break;
4656   case Intrinsic::aarch64_crypto_sha1h: {
4657     Register DstReg = I.getOperand(0).getReg();
4658     Register SrcReg = I.getOperand(2).getReg();
4659 
4660     // FIXME: Should this be an assert?
4661     if (MRI.getType(DstReg).getSizeInBits() != 32 ||
4662         MRI.getType(SrcReg).getSizeInBits() != 32)
4663       return false;
4664 
4665     // The operation has to happen on FPRs. Set up some new FPR registers for
4666     // the source and destination if they are on GPRs.
4667     if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4668       SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
4669       MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)});
4670 
4671       // Make sure the copy ends up getting constrained properly.
4672       RBI.constrainGenericRegister(I.getOperand(2).getReg(),
4673                                    AArch64::GPR32RegClass, MRI);
4674     }
4675 
4676     if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
4677       DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
4678 
4679     // Actually insert the instruction.
4680     auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
4681     constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
4682 
4683     // Did we create a new register for the destination?
4684     if (DstReg != I.getOperand(0).getReg()) {
4685       // Yep. Copy the result of the instruction back into the original
4686       // destination.
4687       MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg});
4688       RBI.constrainGenericRegister(I.getOperand(0).getReg(),
4689                                    AArch64::GPR32RegClass, MRI);
4690     }
4691 
4692     I.eraseFromParent();
4693     return true;
4694   }
4695   case Intrinsic::frameaddress:
4696   case Intrinsic::returnaddress: {
4697     MachineFunction &MF = *I.getParent()->getParent();
4698     MachineFrameInfo &MFI = MF.getFrameInfo();
4699 
4700     unsigned Depth = I.getOperand(2).getImm();
4701     Register DstReg = I.getOperand(0).getReg();
4702     RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
4703 
4704     if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
4705       if (MFReturnAddr) {
4706         MIRBuilder.buildCopy({DstReg}, MFReturnAddr);
4707         I.eraseFromParent();
4708         return true;
4709       }
4710       MFI.setReturnAddressIsTaken(true);
4711       MF.addLiveIn(AArch64::LR, &AArch64::GPR64spRegClass);
4712       // Insert the copy from LR/X30 into the entry block, before it can be
4713       // clobbered by anything.
4714       MachineBasicBlock &EntryBlock = *MF.begin();
4715       if (!EntryBlock.isLiveIn(AArch64::LR))
4716         EntryBlock.addLiveIn(AArch64::LR);
4717       MachineIRBuilder EntryBuilder(MF);
4718       EntryBuilder.setInstr(*EntryBlock.begin());
4719       EntryBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
4720       MFReturnAddr = DstReg;
4721       I.eraseFromParent();
4722       return true;
4723     }
4724 
4725     MFI.setFrameAddressIsTaken(true);
4726     Register FrameAddr(AArch64::FP);
4727     while (Depth--) {
4728       Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
4729       auto Ldr =
4730           MIRBuilder.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr})
4731               .addImm(0);
4732       constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI);
4733       FrameAddr = NextFrame;
4734     }
4735 
4736     if (IntrinID == Intrinsic::frameaddress)
4737       MIRBuilder.buildCopy({DstReg}, {FrameAddr});
4738     else {
4739       MFI.setReturnAddressIsTaken(true);
4740       MIRBuilder.buildInstr(AArch64::LDRXui, {DstReg}, {FrameAddr}).addImm(1);
4741     }
4742 
4743     I.eraseFromParent();
4744     return true;
4745   }
4746   }
4747   return false;
4748 }
4749 
4750 InstructionSelector::ComplexRendererFns
selectShiftA_32(const MachineOperand & Root) const4751 AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
4752   auto MaybeImmed = getImmedFromMO(Root);
4753   if (MaybeImmed == None || *MaybeImmed > 31)
4754     return None;
4755   uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
4756   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4757 }
4758 
4759 InstructionSelector::ComplexRendererFns
selectShiftB_32(const MachineOperand & Root) const4760 AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
4761   auto MaybeImmed = getImmedFromMO(Root);
4762   if (MaybeImmed == None || *MaybeImmed > 31)
4763     return None;
4764   uint64_t Enc = 31 - *MaybeImmed;
4765   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4766 }
4767 
4768 InstructionSelector::ComplexRendererFns
selectShiftA_64(const MachineOperand & Root) const4769 AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
4770   auto MaybeImmed = getImmedFromMO(Root);
4771   if (MaybeImmed == None || *MaybeImmed > 63)
4772     return None;
4773   uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
4774   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4775 }
4776 
4777 InstructionSelector::ComplexRendererFns
selectShiftB_64(const MachineOperand & Root) const4778 AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
4779   auto MaybeImmed = getImmedFromMO(Root);
4780   if (MaybeImmed == None || *MaybeImmed > 63)
4781     return None;
4782   uint64_t Enc = 63 - *MaybeImmed;
4783   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4784 }
4785 
4786 /// Helper to select an immediate value that can be represented as a 12-bit
4787 /// value shifted left by either 0 or 12. If it is possible to do so, return
4788 /// the immediate and shift value. If not, return None.
4789 ///
4790 /// Used by selectArithImmed and selectNegArithImmed.
4791 InstructionSelector::ComplexRendererFns
select12BitValueWithLeftShift(uint64_t Immed) const4792 AArch64InstructionSelector::select12BitValueWithLeftShift(
4793     uint64_t Immed) const {
4794   unsigned ShiftAmt;
4795   if (Immed >> 12 == 0) {
4796     ShiftAmt = 0;
4797   } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
4798     ShiftAmt = 12;
4799     Immed = Immed >> 12;
4800   } else
4801     return None;
4802 
4803   unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
4804   return {{
4805       [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
4806       [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
4807   }};
4808 }
4809 
4810 /// SelectArithImmed - Select an immediate value that can be represented as
4811 /// a 12-bit value shifted left by either 0 or 12.  If so, return true with
4812 /// Val set to the 12-bit value and Shift set to the shifter operand.
4813 InstructionSelector::ComplexRendererFns
selectArithImmed(MachineOperand & Root) const4814 AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
4815   // This function is called from the addsub_shifted_imm ComplexPattern,
4816   // which lists [imm] as the list of opcode it's interested in, however
4817   // we still need to check whether the operand is actually an immediate
4818   // here because the ComplexPattern opcode list is only used in
4819   // root-level opcode matching.
4820   auto MaybeImmed = getImmedFromMO(Root);
4821   if (MaybeImmed == None)
4822     return None;
4823   return select12BitValueWithLeftShift(*MaybeImmed);
4824 }
4825 
4826 /// SelectNegArithImmed - As above, but negates the value before trying to
4827 /// select it.
4828 InstructionSelector::ComplexRendererFns
selectNegArithImmed(MachineOperand & Root) const4829 AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
4830   // We need a register here, because we need to know if we have a 64 or 32
4831   // bit immediate.
4832   if (!Root.isReg())
4833     return None;
4834   auto MaybeImmed = getImmedFromMO(Root);
4835   if (MaybeImmed == None)
4836     return None;
4837   uint64_t Immed = *MaybeImmed;
4838 
4839   // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
4840   // have the opposite effect on the C flag, so this pattern mustn't match under
4841   // those circumstances.
4842   if (Immed == 0)
4843     return None;
4844 
4845   // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
4846   // the root.
4847   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4848   if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
4849     Immed = ~((uint32_t)Immed) + 1;
4850   else
4851     Immed = ~Immed + 1ULL;
4852 
4853   if (Immed & 0xFFFFFFFFFF000000ULL)
4854     return None;
4855 
4856   Immed &= 0xFFFFFFULL;
4857   return select12BitValueWithLeftShift(Immed);
4858 }
4859 
4860 /// Return true if it is worth folding MI into an extended register. That is,
4861 /// if it's safe to pull it into the addressing mode of a load or store as a
4862 /// shift.
isWorthFoldingIntoExtendedReg(MachineInstr & MI,const MachineRegisterInfo & MRI) const4863 bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
4864     MachineInstr &MI, const MachineRegisterInfo &MRI) const {
4865   // Always fold if there is one use, or if we're optimizing for size.
4866   Register DefReg = MI.getOperand(0).getReg();
4867   if (MRI.hasOneNonDBGUse(DefReg) ||
4868       MI.getParent()->getParent()->getFunction().hasMinSize())
4869     return true;
4870 
4871   // It's better to avoid folding and recomputing shifts when we don't have a
4872   // fastpath.
4873   if (!STI.hasLSLFast())
4874     return false;
4875 
4876   // We have a fastpath, so folding a shift in and potentially computing it
4877   // many times may be beneficial. Check if this is only used in memory ops.
4878   // If it is, then we should fold.
4879   return all_of(MRI.use_nodbg_instructions(DefReg),
4880                 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
4881 }
4882 
isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)4883 static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) {
4884   switch (Type) {
4885   case AArch64_AM::SXTB:
4886   case AArch64_AM::SXTH:
4887   case AArch64_AM::SXTW:
4888     return true;
4889   default:
4890     return false;
4891   }
4892 }
4893 
4894 InstructionSelector::ComplexRendererFns
selectExtendedSHL(MachineOperand & Root,MachineOperand & Base,MachineOperand & Offset,unsigned SizeInBytes,bool WantsExt) const4895 AArch64InstructionSelector::selectExtendedSHL(
4896     MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
4897     unsigned SizeInBytes, bool WantsExt) const {
4898   assert(Base.isReg() && "Expected base to be a register operand");
4899   assert(Offset.isReg() && "Expected offset to be a register operand");
4900 
4901   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4902   MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
4903   if (!OffsetInst)
4904     return None;
4905 
4906   unsigned OffsetOpc = OffsetInst->getOpcode();
4907   if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
4908     return None;
4909 
4910   // Make sure that the memory op is a valid size.
4911   int64_t LegalShiftVal = Log2_32(SizeInBytes);
4912   if (LegalShiftVal == 0)
4913     return None;
4914   if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
4915     return None;
4916 
4917   // Now, try to find the specific G_CONSTANT. Start by assuming that the
4918   // register we will offset is the LHS, and the register containing the
4919   // constant is the RHS.
4920   Register OffsetReg = OffsetInst->getOperand(1).getReg();
4921   Register ConstantReg = OffsetInst->getOperand(2).getReg();
4922   auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
4923   if (!ValAndVReg) {
4924     // We didn't get a constant on the RHS. If the opcode is a shift, then
4925     // we're done.
4926     if (OffsetOpc == TargetOpcode::G_SHL)
4927       return None;
4928 
4929     // If we have a G_MUL, we can use either register. Try looking at the RHS.
4930     std::swap(OffsetReg, ConstantReg);
4931     ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
4932     if (!ValAndVReg)
4933       return None;
4934   }
4935 
4936   // The value must fit into 3 bits, and must be positive. Make sure that is
4937   // true.
4938   int64_t ImmVal = ValAndVReg->Value;
4939 
4940   // Since we're going to pull this into a shift, the constant value must be
4941   // a power of 2. If we got a multiply, then we need to check this.
4942   if (OffsetOpc == TargetOpcode::G_MUL) {
4943     if (!isPowerOf2_32(ImmVal))
4944       return None;
4945 
4946     // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
4947     ImmVal = Log2_32(ImmVal);
4948   }
4949 
4950   if ((ImmVal & 0x7) != ImmVal)
4951     return None;
4952 
4953   // We are only allowed to shift by LegalShiftVal. This shift value is built
4954   // into the instruction, so we can't just use whatever we want.
4955   if (ImmVal != LegalShiftVal)
4956     return None;
4957 
4958   unsigned SignExtend = 0;
4959   if (WantsExt) {
4960     // Check if the offset is defined by an extend.
4961     MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
4962     auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
4963     if (Ext == AArch64_AM::InvalidShiftExtend)
4964       return None;
4965 
4966     SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
4967     // We only support SXTW for signed extension here.
4968     if (SignExtend && Ext != AArch64_AM::SXTW)
4969       return None;
4970 
4971     // Need a 32-bit wide register here.
4972     MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
4973     OffsetReg = ExtInst->getOperand(1).getReg();
4974     OffsetReg = narrowExtendRegIfNeeded(OffsetReg, MIB);
4975   }
4976 
4977   // We can use the LHS of the GEP as the base, and the LHS of the shift as an
4978   // offset. Signify that we are shifting by setting the shift flag to 1.
4979   return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
4980            [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
4981            [=](MachineInstrBuilder &MIB) {
4982              // Need to add both immediates here to make sure that they are both
4983              // added to the instruction.
4984              MIB.addImm(SignExtend);
4985              MIB.addImm(1);
4986            }}};
4987 }
4988 
4989 /// This is used for computing addresses like this:
4990 ///
4991 /// ldr x1, [x2, x3, lsl #3]
4992 ///
4993 /// Where x2 is the base register, and x3 is an offset register. The shift-left
4994 /// is a constant value specific to this load instruction. That is, we'll never
4995 /// see anything other than a 3 here (which corresponds to the size of the
4996 /// element being loaded.)
4997 InstructionSelector::ComplexRendererFns
selectAddrModeShiftedExtendXReg(MachineOperand & Root,unsigned SizeInBytes) const4998 AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
4999     MachineOperand &Root, unsigned SizeInBytes) const {
5000   if (!Root.isReg())
5001     return None;
5002   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5003 
5004   // We want to find something like this:
5005   //
5006   // val = G_CONSTANT LegalShiftVal
5007   // shift = G_SHL off_reg val
5008   // ptr = G_PTR_ADD base_reg shift
5009   // x = G_LOAD ptr
5010   //
5011   // And fold it into this addressing mode:
5012   //
5013   // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
5014 
5015   // Check if we can find the G_PTR_ADD.
5016   MachineInstr *PtrAdd =
5017       getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5018   if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
5019     return None;
5020 
5021   // Now, try to match an opcode which will match our specific offset.
5022   // We want a G_SHL or a G_MUL.
5023   MachineInstr *OffsetInst =
5024       getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);
5025   return selectExtendedSHL(Root, PtrAdd->getOperand(1),
5026                            OffsetInst->getOperand(0), SizeInBytes,
5027                            /*WantsExt=*/false);
5028 }
5029 
5030 /// This is used for computing addresses like this:
5031 ///
5032 /// ldr x1, [x2, x3]
5033 ///
5034 /// Where x2 is the base register, and x3 is an offset register.
5035 ///
5036 /// When possible (or profitable) to fold a G_PTR_ADD into the address calculation,
5037 /// this will do so. Otherwise, it will return None.
5038 InstructionSelector::ComplexRendererFns
selectAddrModeRegisterOffset(MachineOperand & Root) const5039 AArch64InstructionSelector::selectAddrModeRegisterOffset(
5040     MachineOperand &Root) const {
5041   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5042 
5043   // We need a GEP.
5044   MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
5045   if (!Gep || Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
5046     return None;
5047 
5048   // If this is used more than once, let's not bother folding.
5049   // TODO: Check if they are memory ops. If they are, then we can still fold
5050   // without having to recompute anything.
5051   if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
5052     return None;
5053 
5054   // Base is the GEP's LHS, offset is its RHS.
5055   return {{[=](MachineInstrBuilder &MIB) {
5056              MIB.addUse(Gep->getOperand(1).getReg());
5057            },
5058            [=](MachineInstrBuilder &MIB) {
5059              MIB.addUse(Gep->getOperand(2).getReg());
5060            },
5061            [=](MachineInstrBuilder &MIB) {
5062              // Need to add both immediates here to make sure that they are both
5063              // added to the instruction.
5064              MIB.addImm(0);
5065              MIB.addImm(0);
5066            }}};
5067 }
5068 
5069 /// This is intended to be equivalent to selectAddrModeXRO in
5070 /// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
5071 InstructionSelector::ComplexRendererFns
selectAddrModeXRO(MachineOperand & Root,unsigned SizeInBytes) const5072 AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
5073                                               unsigned SizeInBytes) const {
5074   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5075 
5076   // If we have a constant offset, then we probably don't want to match a
5077   // register offset.
5078   if (isBaseWithConstantOffset(Root, MRI))
5079     return None;
5080 
5081   // Try to fold shifts into the addressing mode.
5082   auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
5083   if (AddrModeFns)
5084     return AddrModeFns;
5085 
5086   // If that doesn't work, see if it's possible to fold in registers from
5087   // a GEP.
5088   return selectAddrModeRegisterOffset(Root);
5089 }
5090 
5091 /// This is used for computing addresses like this:
5092 ///
5093 /// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
5094 ///
5095 /// Where we have a 64-bit base register, a 32-bit offset register, and an
5096 /// extend (which may or may not be signed).
5097 InstructionSelector::ComplexRendererFns
selectAddrModeWRO(MachineOperand & Root,unsigned SizeInBytes) const5098 AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
5099                                               unsigned SizeInBytes) const {
5100   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5101 
5102   MachineInstr *PtrAdd =
5103       getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5104   if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
5105     return None;
5106 
5107   MachineOperand &LHS = PtrAdd->getOperand(1);
5108   MachineOperand &RHS = PtrAdd->getOperand(2);
5109   MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
5110 
5111   // The first case is the same as selectAddrModeXRO, except we need an extend.
5112   // In this case, we try to find a shift and extend, and fold them into the
5113   // addressing mode.
5114   //
5115   // E.g.
5116   //
5117   // off_reg = G_Z/S/ANYEXT ext_reg
5118   // val = G_CONSTANT LegalShiftVal
5119   // shift = G_SHL off_reg val
5120   // ptr = G_PTR_ADD base_reg shift
5121   // x = G_LOAD ptr
5122   //
5123   // In this case we can get a load like this:
5124   //
5125   // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
5126   auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
5127                                        SizeInBytes, /*WantsExt=*/true);
5128   if (ExtendedShl)
5129     return ExtendedShl;
5130 
5131   // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
5132   //
5133   // e.g.
5134   // ldr something, [base_reg, ext_reg, sxtw]
5135   if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
5136     return None;
5137 
5138   // Check if this is an extend. We'll get an extend type if it is.
5139   AArch64_AM::ShiftExtendType Ext =
5140       getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
5141   if (Ext == AArch64_AM::InvalidShiftExtend)
5142     return None;
5143 
5144   // Need a 32-bit wide register.
5145   MachineIRBuilder MIB(*PtrAdd);
5146   Register ExtReg =
5147       narrowExtendRegIfNeeded(OffsetInst->getOperand(1).getReg(), MIB);
5148   unsigned SignExtend = Ext == AArch64_AM::SXTW;
5149 
5150   // Base is LHS, offset is ExtReg.
5151   return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
5152            [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
5153            [=](MachineInstrBuilder &MIB) {
5154              MIB.addImm(SignExtend);
5155              MIB.addImm(0);
5156            }}};
5157 }
5158 
5159 /// Select a "register plus unscaled signed 9-bit immediate" address.  This
5160 /// should only match when there is an offset that is not valid for a scaled
5161 /// immediate addressing mode.  The "Size" argument is the size in bytes of the
5162 /// memory reference, which is needed here to know what is valid for a scaled
5163 /// immediate.
5164 InstructionSelector::ComplexRendererFns
selectAddrModeUnscaled(MachineOperand & Root,unsigned Size) const5165 AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
5166                                                    unsigned Size) const {
5167   MachineRegisterInfo &MRI =
5168       Root.getParent()->getParent()->getParent()->getRegInfo();
5169 
5170   if (!Root.isReg())
5171     return None;
5172 
5173   if (!isBaseWithConstantOffset(Root, MRI))
5174     return None;
5175 
5176   MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
5177   if (!RootDef)
5178     return None;
5179 
5180   MachineOperand &OffImm = RootDef->getOperand(2);
5181   if (!OffImm.isReg())
5182     return None;
5183   MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
5184   if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
5185     return None;
5186   int64_t RHSC;
5187   MachineOperand &RHSOp1 = RHS->getOperand(1);
5188   if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
5189     return None;
5190   RHSC = RHSOp1.getCImm()->getSExtValue();
5191 
5192   // If the offset is valid as a scaled immediate, don't match here.
5193   if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
5194     return None;
5195   if (RHSC >= -256 && RHSC < 256) {
5196     MachineOperand &Base = RootDef->getOperand(1);
5197     return {{
5198         [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
5199         [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
5200     }};
5201   }
5202   return None;
5203 }
5204 
5205 InstructionSelector::ComplexRendererFns
tryFoldAddLowIntoImm(MachineInstr & RootDef,unsigned Size,MachineRegisterInfo & MRI) const5206 AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
5207                                                  unsigned Size,
5208                                                  MachineRegisterInfo &MRI) const {
5209   if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
5210     return None;
5211   MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
5212   if (Adrp.getOpcode() != AArch64::ADRP)
5213     return None;
5214 
5215   // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
5216   // TODO: Need to check GV's offset % size if doing offset folding into globals.
5217   assert(Adrp.getOperand(1).getOffset() == 0 && "Unexpected offset in global");
5218   auto GV = Adrp.getOperand(1).getGlobal();
5219   if (GV->isThreadLocal())
5220     return None;
5221 
5222   auto &MF = *RootDef.getParent()->getParent();
5223   if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
5224     return None;
5225 
5226   unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
5227   MachineIRBuilder MIRBuilder(RootDef);
5228   Register AdrpReg = Adrp.getOperand(0).getReg();
5229   return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
5230            [=](MachineInstrBuilder &MIB) {
5231              MIB.addGlobalAddress(GV, /* Offset */ 0,
5232                                   OpFlags | AArch64II::MO_PAGEOFF |
5233                                       AArch64II::MO_NC);
5234            }}};
5235 }
5236 
5237 /// Select a "register plus scaled unsigned 12-bit immediate" address.  The
5238 /// "Size" argument is the size in bytes of the memory reference, which
5239 /// determines the scale.
5240 InstructionSelector::ComplexRendererFns
selectAddrModeIndexed(MachineOperand & Root,unsigned Size) const5241 AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
5242                                                   unsigned Size) const {
5243   MachineFunction &MF = *Root.getParent()->getParent()->getParent();
5244   MachineRegisterInfo &MRI = MF.getRegInfo();
5245 
5246   if (!Root.isReg())
5247     return None;
5248 
5249   MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
5250   if (!RootDef)
5251     return None;
5252 
5253   if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
5254     return {{
5255         [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
5256         [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
5257     }};
5258   }
5259 
5260   CodeModel::Model CM = MF.getTarget().getCodeModel();
5261   // Check if we can fold in the ADD of small code model ADRP + ADD address.
5262   if (CM == CodeModel::Small) {
5263     auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
5264     if (OpFns)
5265       return OpFns;
5266   }
5267 
5268   if (isBaseWithConstantOffset(Root, MRI)) {
5269     MachineOperand &LHS = RootDef->getOperand(1);
5270     MachineOperand &RHS = RootDef->getOperand(2);
5271     MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
5272     MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
5273     if (LHSDef && RHSDef) {
5274       int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
5275       unsigned Scale = Log2_32(Size);
5276       if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
5277         if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
5278           return {{
5279               [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
5280               [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
5281           }};
5282 
5283         return {{
5284             [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
5285             [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
5286         }};
5287       }
5288     }
5289   }
5290 
5291   // Before falling back to our general case, check if the unscaled
5292   // instructions can handle this. If so, that's preferable.
5293   if (selectAddrModeUnscaled(Root, Size).hasValue())
5294     return None;
5295 
5296   return {{
5297       [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
5298       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
5299   }};
5300 }
5301 
5302 /// Given a shift instruction, return the correct shift type for that
5303 /// instruction.
getShiftTypeForInst(MachineInstr & MI)5304 static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
5305   // TODO: Handle AArch64_AM::ROR
5306   switch (MI.getOpcode()) {
5307   default:
5308     return AArch64_AM::InvalidShiftExtend;
5309   case TargetOpcode::G_SHL:
5310     return AArch64_AM::LSL;
5311   case TargetOpcode::G_LSHR:
5312     return AArch64_AM::LSR;
5313   case TargetOpcode::G_ASHR:
5314     return AArch64_AM::ASR;
5315   }
5316 }
5317 
5318 /// Select a "shifted register" operand. If the value is not shifted, set the
5319 /// shift operand to a default value of "lsl 0".
5320 ///
5321 /// TODO: Allow shifted register to be rotated in logical instructions.
5322 InstructionSelector::ComplexRendererFns
selectShiftedRegister(MachineOperand & Root) const5323 AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const {
5324   if (!Root.isReg())
5325     return None;
5326   MachineRegisterInfo &MRI =
5327       Root.getParent()->getParent()->getParent()->getRegInfo();
5328 
5329   // Check if the operand is defined by an instruction which corresponds to
5330   // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
5331   //
5332   // TODO: Handle AArch64_AM::ROR for logical instructions.
5333   MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
5334   if (!ShiftInst)
5335     return None;
5336   AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst);
5337   if (ShType == AArch64_AM::InvalidShiftExtend)
5338     return None;
5339   if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
5340     return None;
5341 
5342   // Need an immediate on the RHS.
5343   MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
5344   auto Immed = getImmedFromMO(ShiftRHS);
5345   if (!Immed)
5346     return None;
5347 
5348   // We have something that we can fold. Fold in the shift's LHS and RHS into
5349   // the instruction.
5350   MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
5351   Register ShiftReg = ShiftLHS.getReg();
5352 
5353   unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
5354   unsigned Val = *Immed & (NumBits - 1);
5355   unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
5356 
5357   return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
5358            [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
5359 }
5360 
getExtendTypeForInst(MachineInstr & MI,MachineRegisterInfo & MRI,bool IsLoadStore) const5361 AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
5362     MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
5363   unsigned Opc = MI.getOpcode();
5364 
5365   // Handle explicit extend instructions first.
5366   if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
5367     unsigned Size;
5368     if (Opc == TargetOpcode::G_SEXT)
5369       Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
5370     else
5371       Size = MI.getOperand(2).getImm();
5372     assert(Size != 64 && "Extend from 64 bits?");
5373     switch (Size) {
5374     case 8:
5375       return AArch64_AM::SXTB;
5376     case 16:
5377       return AArch64_AM::SXTH;
5378     case 32:
5379       return AArch64_AM::SXTW;
5380     default:
5381       return AArch64_AM::InvalidShiftExtend;
5382     }
5383   }
5384 
5385   if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
5386     unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
5387     assert(Size != 64 && "Extend from 64 bits?");
5388     switch (Size) {
5389     case 8:
5390       return AArch64_AM::UXTB;
5391     case 16:
5392       return AArch64_AM::UXTH;
5393     case 32:
5394       return AArch64_AM::UXTW;
5395     default:
5396       return AArch64_AM::InvalidShiftExtend;
5397     }
5398   }
5399 
5400   // Don't have an explicit extend. Try to handle a G_AND with a constant mask
5401   // on the RHS.
5402   if (Opc != TargetOpcode::G_AND)
5403     return AArch64_AM::InvalidShiftExtend;
5404 
5405   Optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
5406   if (!MaybeAndMask)
5407     return AArch64_AM::InvalidShiftExtend;
5408   uint64_t AndMask = *MaybeAndMask;
5409   switch (AndMask) {
5410   default:
5411     return AArch64_AM::InvalidShiftExtend;
5412   case 0xFF:
5413     return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
5414   case 0xFFFF:
5415     return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
5416   case 0xFFFFFFFF:
5417     return AArch64_AM::UXTW;
5418   }
5419 }
5420 
narrowExtendRegIfNeeded(Register ExtReg,MachineIRBuilder & MIB) const5421 Register AArch64InstructionSelector::narrowExtendRegIfNeeded(
5422     Register ExtReg, MachineIRBuilder &MIB) const {
5423   MachineRegisterInfo &MRI = *MIB.getMRI();
5424   if (MRI.getType(ExtReg).getSizeInBits() == 32)
5425     return ExtReg;
5426 
5427   // Insert a copy to move ExtReg to GPR32.
5428   Register NarrowReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5429   auto Copy = MIB.buildCopy({NarrowReg}, {ExtReg});
5430 
5431   // Select the copy into a subregister copy.
5432   selectCopy(*Copy, TII, MRI, TRI, RBI);
5433   return Copy.getReg(0);
5434 }
5435 
widenGPRBankRegIfNeeded(Register Reg,unsigned WideSize,MachineIRBuilder & MIB) const5436 Register AArch64InstructionSelector::widenGPRBankRegIfNeeded(
5437     Register Reg, unsigned WideSize, MachineIRBuilder &MIB) const {
5438   assert(WideSize >= 8 && "WideSize is smaller than all possible registers?");
5439   MachineRegisterInfo &MRI = *MIB.getMRI();
5440   unsigned NarrowSize = MRI.getType(Reg).getSizeInBits();
5441   assert(WideSize >= NarrowSize &&
5442          "WideSize cannot be smaller than NarrowSize!");
5443 
5444   // If the sizes match, just return the register.
5445   //
5446   // If NarrowSize is an s1, then we can select it to any size, so we'll treat
5447   // it as a don't care.
5448   if (NarrowSize == WideSize || NarrowSize == 1)
5449     return Reg;
5450 
5451   // Now check the register classes.
5452   const RegisterBank *RB = RBI.getRegBank(Reg, MRI, TRI);
5453   const TargetRegisterClass *OrigRC = getMinClassForRegBank(*RB, NarrowSize);
5454   const TargetRegisterClass *WideRC = getMinClassForRegBank(*RB, WideSize);
5455   assert(OrigRC && "Could not determine narrow RC?");
5456   assert(WideRC && "Could not determine wide RC?");
5457 
5458   // If the sizes differ, but the register classes are the same, there is no
5459   // need to insert a SUBREG_TO_REG.
5460   //
5461   // For example, an s8 that's supposed to be a GPR will be selected to either
5462   // a GPR32 or a GPR64 register. Note that this assumes that the s8 will
5463   // always end up on a GPR32.
5464   if (OrigRC == WideRC)
5465     return Reg;
5466 
5467   // We have two different register classes. Insert a SUBREG_TO_REG.
5468   unsigned SubReg = 0;
5469   getSubRegForClass(OrigRC, TRI, SubReg);
5470   assert(SubReg && "Couldn't determine subregister?");
5471 
5472   // Build the SUBREG_TO_REG and return the new, widened register.
5473   auto SubRegToReg =
5474       MIB.buildInstr(AArch64::SUBREG_TO_REG, {WideRC}, {})
5475           .addImm(0)
5476           .addUse(Reg)
5477           .addImm(SubReg);
5478   constrainSelectedInstRegOperands(*SubRegToReg, TII, TRI, RBI);
5479   return SubRegToReg.getReg(0);
5480 }
5481 
5482 /// Select an "extended register" operand. This operand folds in an extend
5483 /// followed by an optional left shift.
5484 InstructionSelector::ComplexRendererFns
selectArithExtendedRegister(MachineOperand & Root) const5485 AArch64InstructionSelector::selectArithExtendedRegister(
5486     MachineOperand &Root) const {
5487   if (!Root.isReg())
5488     return None;
5489   MachineRegisterInfo &MRI =
5490       Root.getParent()->getParent()->getParent()->getRegInfo();
5491 
5492   uint64_t ShiftVal = 0;
5493   Register ExtReg;
5494   AArch64_AM::ShiftExtendType Ext;
5495   MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
5496   if (!RootDef)
5497     return None;
5498 
5499   if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI))
5500     return None;
5501 
5502   // Check if we can fold a shift and an extend.
5503   if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
5504     // Look for a constant on the RHS of the shift.
5505     MachineOperand &RHS = RootDef->getOperand(2);
5506     Optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
5507     if (!MaybeShiftVal)
5508       return None;
5509     ShiftVal = *MaybeShiftVal;
5510     if (ShiftVal > 4)
5511       return None;
5512     // Look for a valid extend instruction on the LHS of the shift.
5513     MachineOperand &LHS = RootDef->getOperand(1);
5514     MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
5515     if (!ExtDef)
5516       return None;
5517     Ext = getExtendTypeForInst(*ExtDef, MRI);
5518     if (Ext == AArch64_AM::InvalidShiftExtend)
5519       return None;
5520     ExtReg = ExtDef->getOperand(1).getReg();
5521   } else {
5522     // Didn't get a shift. Try just folding an extend.
5523     Ext = getExtendTypeForInst(*RootDef, MRI);
5524     if (Ext == AArch64_AM::InvalidShiftExtend)
5525       return None;
5526     ExtReg = RootDef->getOperand(1).getReg();
5527 
5528     // If we have a 32 bit instruction which zeroes out the high half of a
5529     // register, we get an implicit zero extend for free. Check if we have one.
5530     // FIXME: We actually emit the extend right now even though we don't have
5531     // to.
5532     if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
5533       MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
5534       if (ExtInst && isDef32(*ExtInst))
5535         return None;
5536     }
5537   }
5538 
5539   // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
5540   // copy.
5541   MachineIRBuilder MIB(*RootDef);
5542   ExtReg = narrowExtendRegIfNeeded(ExtReg, MIB);
5543 
5544   return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
5545            [=](MachineInstrBuilder &MIB) {
5546              MIB.addImm(getArithExtendImm(Ext, ShiftVal));
5547            }}};
5548 }
5549 
renderTruncImm(MachineInstrBuilder & MIB,const MachineInstr & MI,int OpIdx) const5550 void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
5551                                                 const MachineInstr &MI,
5552                                                 int OpIdx) const {
5553   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
5554   assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5555          "Expected G_CONSTANT");
5556   Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI);
5557   assert(CstVal && "Expected constant value");
5558   MIB.addImm(CstVal.getValue());
5559 }
5560 
renderLogicalImm32(MachineInstrBuilder & MIB,const MachineInstr & I,int OpIdx) const5561 void AArch64InstructionSelector::renderLogicalImm32(
5562   MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
5563   assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5564          "Expected G_CONSTANT");
5565   uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
5566   uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);
5567   MIB.addImm(Enc);
5568 }
5569 
renderLogicalImm64(MachineInstrBuilder & MIB,const MachineInstr & I,int OpIdx) const5570 void AArch64InstructionSelector::renderLogicalImm64(
5571   MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
5572   assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5573          "Expected G_CONSTANT");
5574   uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
5575   uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);
5576   MIB.addImm(Enc);
5577 }
5578 
isLoadStoreOfNumBytes(const MachineInstr & MI,unsigned NumBytes) const5579 bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
5580     const MachineInstr &MI, unsigned NumBytes) const {
5581   if (!MI.mayLoadOrStore())
5582     return false;
5583   assert(MI.hasOneMemOperand() &&
5584          "Expected load/store to have only one mem op!");
5585   return (*MI.memoperands_begin())->getSize() == NumBytes;
5586 }
5587 
isDef32(const MachineInstr & MI) const5588 bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
5589   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
5590   if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
5591     return false;
5592 
5593   // Only return true if we know the operation will zero-out the high half of
5594   // the 64-bit register. Truncates can be subregister copies, which don't
5595   // zero out the high bits. Copies and other copy-like instructions can be
5596   // fed by truncates, or could be lowered as subregister copies.
5597   switch (MI.getOpcode()) {
5598   default:
5599     return true;
5600   case TargetOpcode::COPY:
5601   case TargetOpcode::G_BITCAST:
5602   case TargetOpcode::G_TRUNC:
5603   case TargetOpcode::G_PHI:
5604     return false;
5605   }
5606 }
5607 
5608 
5609 // Perform fixups on the given PHI instruction's operands to force them all
5610 // to be the same as the destination regbank.
fixupPHIOpBanks(MachineInstr & MI,MachineRegisterInfo & MRI,const AArch64RegisterBankInfo & RBI)5611 static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI,
5612                             const AArch64RegisterBankInfo &RBI) {
5613   assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
5614   Register DstReg = MI.getOperand(0).getReg();
5615   const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
5616   assert(DstRB && "Expected PHI dst to have regbank assigned");
5617   MachineIRBuilder MIB(MI);
5618 
5619   // Go through each operand and ensure it has the same regbank.
5620   for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
5621     MachineOperand &MO = MI.getOperand(OpIdx);
5622     if (!MO.isReg())
5623       continue;
5624     Register OpReg = MO.getReg();
5625     const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
5626     if (RB != DstRB) {
5627       // Insert a cross-bank copy.
5628       auto *OpDef = MRI.getVRegDef(OpReg);
5629       const LLT &Ty = MRI.getType(OpReg);
5630       MIB.setInsertPt(*OpDef->getParent(), std::next(OpDef->getIterator()));
5631       auto Copy = MIB.buildCopy(Ty, OpReg);
5632       MRI.setRegBank(Copy.getReg(0), *DstRB);
5633       MO.setReg(Copy.getReg(0));
5634     }
5635   }
5636 }
5637 
processPHIs(MachineFunction & MF)5638 void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
5639   // We're looking for PHIs, build a list so we don't invalidate iterators.
5640   MachineRegisterInfo &MRI = MF.getRegInfo();
5641   SmallVector<MachineInstr *, 32> Phis;
5642   for (auto &BB : MF) {
5643     for (auto &MI : BB) {
5644       if (MI.getOpcode() == TargetOpcode::G_PHI)
5645         Phis.emplace_back(&MI);
5646     }
5647   }
5648 
5649   for (auto *MI : Phis) {
5650     // We need to do some work here if the operand types are < 16 bit and they
5651     // are split across fpr/gpr banks. Since all types <32b on gpr
5652     // end up being assigned gpr32 regclasses, we can end up with PHIs here
5653     // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
5654     // be selecting heterogenous regbanks for operands if possible, but we
5655     // still need to be able to deal with it here.
5656     //
5657     // To fix this, if we have a gpr-bank operand < 32b in size and at least
5658     // one other operand is on the fpr bank, then we add cross-bank copies
5659     // to homogenize the operand banks. For simplicity the bank that we choose
5660     // to settle on is whatever bank the def operand has. For example:
5661     //
5662     // %endbb:
5663     //   %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
5664     //  =>
5665     // %bb2:
5666     //   ...
5667     //   %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
5668     //   ...
5669     // %endbb:
5670     //   %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
5671     bool HasGPROp = false, HasFPROp = false;
5672     for (unsigned OpIdx = 1; OpIdx < MI->getNumOperands(); ++OpIdx) {
5673       const auto &MO = MI->getOperand(OpIdx);
5674       if (!MO.isReg())
5675         continue;
5676       const LLT &Ty = MRI.getType(MO.getReg());
5677       if (!Ty.isValid() || !Ty.isScalar())
5678         break;
5679       if (Ty.getSizeInBits() >= 32)
5680         break;
5681       const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
5682       // If for some reason we don't have a regbank yet. Don't try anything.
5683       if (!RB)
5684         break;
5685 
5686       if (RB->getID() == AArch64::GPRRegBankID)
5687         HasGPROp = true;
5688       else
5689         HasFPROp = true;
5690     }
5691     // We have heterogenous regbanks, need to fixup.
5692     if (HasGPROp && HasFPROp)
5693       fixupPHIOpBanks(*MI, MRI, RBI);
5694   }
5695 }
5696 
5697 namespace llvm {
5698 InstructionSelector *
createAArch64InstructionSelector(const AArch64TargetMachine & TM,AArch64Subtarget & Subtarget,AArch64RegisterBankInfo & RBI)5699 createAArch64InstructionSelector(const AArch64TargetMachine &TM,
5700                                  AArch64Subtarget &Subtarget,
5701                                  AArch64RegisterBankInfo &RBI) {
5702   return new AArch64InstructionSelector(TM, Subtarget, RBI);
5703 }
5704 }
5705