1 //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the InstructionSelector class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64InstrInfo.h"
15 #include "AArch64MachineFunctionInfo.h"
16 #include "AArch64RegisterBankInfo.h"
17 #include "AArch64RegisterInfo.h"
18 #include "AArch64Subtarget.h"
19 #include "AArch64TargetMachine.h"
20 #include "MCTargetDesc/AArch64AddressingModes.h"
21 #include "MCTargetDesc/AArch64MCTargetDesc.h"
22 #include "llvm/ADT/Optional.h"
23 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
24 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
25 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
26 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
27 #include "llvm/CodeGen/GlobalISel/Utils.h"
28 #include "llvm/CodeGen/MachineBasicBlock.h"
29 #include "llvm/CodeGen/MachineConstantPool.h"
30 #include "llvm/CodeGen/MachineFunction.h"
31 #include "llvm/CodeGen/MachineInstr.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineOperand.h"
34 #include "llvm/CodeGen/MachineRegisterInfo.h"
35 #include "llvm/CodeGen/TargetOpcodes.h"
36 #include "llvm/IR/Constants.h"
37 #include "llvm/IR/Instructions.h"
38 #include "llvm/IR/PatternMatch.h"
39 #include "llvm/IR/Type.h"
40 #include "llvm/IR/IntrinsicsAArch64.h"
41 #include "llvm/Pass.h"
42 #include "llvm/Support/Debug.h"
43 #include "llvm/Support/raw_ostream.h"
44 
45 #define DEBUG_TYPE "aarch64-isel"
46 
47 using namespace llvm;
48 using namespace MIPatternMatch;
49 
50 namespace {
51 
52 #define GET_GLOBALISEL_PREDICATE_BITSET
53 #include "AArch64GenGlobalISel.inc"
54 #undef GET_GLOBALISEL_PREDICATE_BITSET
55 
56 class AArch64InstructionSelector : public InstructionSelector {
57 public:
58   AArch64InstructionSelector(const AArch64TargetMachine &TM,
59                              const AArch64Subtarget &STI,
60                              const AArch64RegisterBankInfo &RBI);
61 
62   bool select(MachineInstr &I) override;
63   static const char *getName() { return DEBUG_TYPE; }
64 
65   void setupMF(MachineFunction &MF, GISelKnownBits &KB,
66                CodeGenCoverage &CoverageInfo) override {
67     InstructionSelector::setupMF(MF, KB, CoverageInfo);
68 
69     // hasFnAttribute() is expensive to call on every BRCOND selection, so
70     // cache it here for each run of the selector.
71     ProduceNonFlagSettingCondBr =
72         !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
73     MFReturnAddr = Register();
74 
75     processPHIs(MF);
76   }
77 
78 private:
79   /// tblgen-erated 'select' implementation, used as the initial selector for
80   /// the patterns that don't require complex C++.
81   bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
82 
83   // A lowering phase that runs before any selection attempts.
84   // Returns true if the instruction was modified.
85   bool preISelLower(MachineInstr &I);
86 
87   // An early selection function that runs before the selectImpl() call.
88   bool earlySelect(MachineInstr &I) const;
89 
90   // Do some preprocessing of G_PHIs before we begin selection.
91   void processPHIs(MachineFunction &MF);
92 
93   bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
94 
95   /// Eliminate same-sized cross-bank copies into stores before selectImpl().
96   bool contractCrossBankCopyIntoStore(MachineInstr &I,
97                                       MachineRegisterInfo &MRI);
98 
99   bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
100 
101   bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
102                           MachineRegisterInfo &MRI) const;
103   bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
104                            MachineRegisterInfo &MRI) const;
105 
106   ///@{
107   /// Helper functions for selectCompareBranch.
108   bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
109                                     MachineIRBuilder &MIB) const;
110   bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
111                                     MachineIRBuilder &MIB) const;
112   bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
113                                     MachineIRBuilder &MIB) const;
114   bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
115                                   MachineBasicBlock *DstMBB,
116                                   MachineIRBuilder &MIB) const;
117   ///@}
118 
119   bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
120                            MachineRegisterInfo &MRI) const;
121 
122   bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI) const;
123   bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
124 
125   // Helper to generate an equivalent of scalar_to_vector into a new register,
126   // returned via 'Dst'.
127   MachineInstr *emitScalarToVector(unsigned EltSize,
128                                    const TargetRegisterClass *DstRC,
129                                    Register Scalar,
130                                    MachineIRBuilder &MIRBuilder) const;
131 
132   /// Emit a lane insert into \p DstReg, or a new vector register if None is
133   /// provided.
134   ///
135   /// The lane inserted into is defined by \p LaneIdx. The vector source
136   /// register is given by \p SrcReg. The register containing the element is
137   /// given by \p EltReg.
138   MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
139                                Register EltReg, unsigned LaneIdx,
140                                const RegisterBank &RB,
141                                MachineIRBuilder &MIRBuilder) const;
142   bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
143   bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
144                               MachineRegisterInfo &MRI) const;
145   bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
146   bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
147   bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
148 
149   bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
150   bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
151   bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
152   bool selectSplitVectorUnmerge(MachineInstr &I,
153                                 MachineRegisterInfo &MRI) const;
154   bool selectIntrinsicWithSideEffects(MachineInstr &I,
155                                       MachineRegisterInfo &MRI) const;
156   bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
157   bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
158   bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
159   bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
160   bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
161   bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
162   bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const;
163   bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI) const;
164 
165   unsigned emitConstantPoolEntry(const Constant *CPVal,
166                                  MachineFunction &MF) const;
167   MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
168                                          MachineIRBuilder &MIRBuilder) const;
169 
170   // Emit a vector concat operation.
171   MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
172                                  Register Op2,
173                                  MachineIRBuilder &MIRBuilder) const;
174 
175   // Emit an integer compare between LHS and RHS, which checks for Predicate.
176   MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
177                                    MachineOperand &Predicate,
178                                    MachineIRBuilder &MIRBuilder) const;
179 
180   /// Emit a floating point comparison between \p LHS and \p RHS.
181   /// \p Pred if given is the intended predicate to use.
182   MachineInstr *emitFPCompare(Register LHS, Register RHS,
183                               MachineIRBuilder &MIRBuilder,
184                               Optional<CmpInst::Predicate> = None) const;
185 
186   MachineInstr *emitInstr(unsigned Opcode,
187                           std::initializer_list<llvm::DstOp> DstOps,
188                           std::initializer_list<llvm::SrcOp> SrcOps,
189                           MachineIRBuilder &MIRBuilder,
190                           const ComplexRendererFns &RenderFns = None) const;
191   /// Helper function to emit an add or sub instruction.
192   ///
193   /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
194   /// in a specific order.
195   ///
196   /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
197   ///
198   /// \code
199   ///   const std::array<std::array<unsigned, 2>, 4> Table {
200   ///    {{AArch64::ADDXri, AArch64::ADDWri},
201   ///     {AArch64::ADDXrs, AArch64::ADDWrs},
202   ///     {AArch64::ADDXrr, AArch64::ADDWrr},
203   ///     {AArch64::SUBXri, AArch64::SUBWri},
204   ///     {AArch64::ADDXrx, AArch64::ADDWrx}}};
205   /// \endcode
206   ///
207   /// Each row in the table corresponds to a different addressing mode. Each
208   /// column corresponds to a different register size.
209   ///
210   /// \attention Rows must be structured as follows:
211   ///   - Row 0: The ri opcode variants
212   ///   - Row 1: The rs opcode variants
213   ///   - Row 2: The rr opcode variants
214   ///   - Row 3: The ri opcode variants for negative immediates
215   ///   - Row 4: The rx opcode variants
216   ///
217   /// \attention Columns must be structured as follows:
218   ///   - Column 0: The 64-bit opcode variants
219   ///   - Column 1: The 32-bit opcode variants
220   ///
221   /// \p Dst is the destination register of the binop to emit.
222   /// \p LHS is the left-hand operand of the binop to emit.
223   /// \p RHS is the right-hand operand of the binop to emit.
224   MachineInstr *emitAddSub(
225       const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
226       Register Dst, MachineOperand &LHS, MachineOperand &RHS,
227       MachineIRBuilder &MIRBuilder) const;
228   MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
229                         MachineOperand &RHS,
230                         MachineIRBuilder &MIRBuilder) const;
231   MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
232                          MachineIRBuilder &MIRBuilder) const;
233   MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
234                          MachineIRBuilder &MIRBuilder) const;
235   MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
236                         MachineIRBuilder &MIRBuilder) const;
237   MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
238                         MachineIRBuilder &MIRBuilder) const;
239   MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
240                            AArch64CC::CondCode CC,
241                            MachineIRBuilder &MIRBuilder) const;
242   MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
243                                      const RegisterBank &DstRB, LLT ScalarTy,
244                                      Register VecReg, unsigned LaneIdx,
245                                      MachineIRBuilder &MIRBuilder) const;
246 
247   /// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be
248   /// materialized using a FMOV instruction, then update MI and return it.
249   /// Otherwise, do nothing and return a nullptr.
250   MachineInstr *emitFMovForFConstant(MachineInstr &MI,
251                                      MachineRegisterInfo &MRI) const;
252 
253   /// Emit a CSet for an integer compare.
254   ///
255   /// \p DefReg is expected to be a 32-bit scalar register.
256   MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
257                                 MachineIRBuilder &MIRBuilder) const;
258   /// Emit a CSet for a FP compare.
259   ///
260   /// \p Dst is expected to be a 32-bit scalar register.
261   MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
262                                 MachineIRBuilder &MIRBuilder) const;
263 
264   /// Emit the overflow op for \p Opcode.
265   ///
266   /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
267   /// G_USUBO, etc.
268   std::pair<MachineInstr *, AArch64CC::CondCode>
269   emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
270                  MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
271 
272   /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
273   /// \p IsNegative is true if the test should be "not zero".
274   /// This will also optimize the test bit instruction when possible.
275   MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
276                             MachineBasicBlock *DstMBB,
277                             MachineIRBuilder &MIB) const;
278 
279   /// Emit a CB(N)Z instruction which branches to \p DestMBB.
280   MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
281                         MachineBasicBlock *DestMBB,
282                         MachineIRBuilder &MIB) const;
283 
284   // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
285   // We use these manually instead of using the importer since it doesn't
286   // support SDNodeXForm.
287   ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
288   ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
289   ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
290   ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
291 
292   ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
293   ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
294   ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
295 
296   ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
297                                             unsigned Size) const;
298 
299   ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
300     return selectAddrModeUnscaled(Root, 1);
301   }
302   ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
303     return selectAddrModeUnscaled(Root, 2);
304   }
305   ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
306     return selectAddrModeUnscaled(Root, 4);
307   }
308   ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
309     return selectAddrModeUnscaled(Root, 8);
310   }
311   ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
312     return selectAddrModeUnscaled(Root, 16);
313   }
314 
315   /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
316   /// from complex pattern matchers like selectAddrModeIndexed().
317   ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
318                                           MachineRegisterInfo &MRI) const;
319 
320   ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
321                                            unsigned Size) const;
322   template <int Width>
323   ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
324     return selectAddrModeIndexed(Root, Width / 8);
325   }
326 
327   bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
328                                      const MachineRegisterInfo &MRI) const;
329   ComplexRendererFns
330   selectAddrModeShiftedExtendXReg(MachineOperand &Root,
331                                   unsigned SizeInBytes) const;
332 
333   /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
334   /// or not a shift + extend should be folded into an addressing mode. Returns
335   /// None when this is not profitable or possible.
336   ComplexRendererFns
337   selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
338                     MachineOperand &Offset, unsigned SizeInBytes,
339                     bool WantsExt) const;
340   ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
341   ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
342                                        unsigned SizeInBytes) const;
343   template <int Width>
344   ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
345     return selectAddrModeXRO(Root, Width / 8);
346   }
347 
348   ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
349                                        unsigned SizeInBytes) const;
350   template <int Width>
351   ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
352     return selectAddrModeWRO(Root, Width / 8);
353   }
354 
355   ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const;
356 
357   ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
358     return selectShiftedRegister(Root);
359   }
360 
361   ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
362     // TODO: selectShiftedRegister should allow for rotates on logical shifts.
363     // For now, make them the same. The only difference between the two is that
364     // logical shifts are allowed to fold in rotates. Otherwise, these are
365     // functionally the same.
366     return selectShiftedRegister(Root);
367   }
368 
369   /// Given an extend instruction, determine the correct shift-extend type for
370   /// that instruction.
371   ///
372   /// If the instruction is going to be used in a load or store, pass
373   /// \p IsLoadStore = true.
374   AArch64_AM::ShiftExtendType
375   getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
376                        bool IsLoadStore = false) const;
377 
378   /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
379   ///
380   /// \returns Either \p Reg if no change was necessary, or the new register
381   /// created by moving \p Reg.
382   ///
383   /// Note: This uses emitCopy right now.
384   Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
385                               MachineIRBuilder &MIB) const;
386 
387   ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
388 
389   void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
390                       int OpIdx = -1) const;
391   void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
392                           int OpIdx = -1) const;
393   void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
394                           int OpIdx = -1) const;
395 
396   // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
397   void materializeLargeCMVal(MachineInstr &I, const Value *V,
398                              unsigned OpFlags) const;
399 
400   // Optimization methods.
401   bool tryOptSelect(MachineInstr &MI) const;
402   MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
403                                       MachineOperand &Predicate,
404                                       MachineIRBuilder &MIRBuilder) const;
405 
406   /// Return true if \p MI is a load or store of \p NumBytes bytes.
407   bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
408 
409   /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
410   /// register zeroed out. In other words, the result of MI has been explicitly
411   /// zero extended.
412   bool isDef32(const MachineInstr &MI) const;
413 
414   const AArch64TargetMachine &TM;
415   const AArch64Subtarget &STI;
416   const AArch64InstrInfo &TII;
417   const AArch64RegisterInfo &TRI;
418   const AArch64RegisterBankInfo &RBI;
419 
420   bool ProduceNonFlagSettingCondBr = false;
421 
422   // Some cached values used during selection.
423   // We use LR as a live-in register, and we keep track of it here as it can be
424   // clobbered by calls.
425   Register MFReturnAddr;
426 
427 #define GET_GLOBALISEL_PREDICATES_DECL
428 #include "AArch64GenGlobalISel.inc"
429 #undef GET_GLOBALISEL_PREDICATES_DECL
430 
431 // We declare the temporaries used by selectImpl() in the class to minimize the
432 // cost of constructing placeholder values.
433 #define GET_GLOBALISEL_TEMPORARIES_DECL
434 #include "AArch64GenGlobalISel.inc"
435 #undef GET_GLOBALISEL_TEMPORARIES_DECL
436 };
437 
438 } // end anonymous namespace
439 
440 #define GET_GLOBALISEL_IMPL
441 #include "AArch64GenGlobalISel.inc"
442 #undef GET_GLOBALISEL_IMPL
443 
444 AArch64InstructionSelector::AArch64InstructionSelector(
445     const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
446     const AArch64RegisterBankInfo &RBI)
447     : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
448       TRI(*STI.getRegisterInfo()), RBI(RBI),
449 #define GET_GLOBALISEL_PREDICATES_INIT
450 #include "AArch64GenGlobalISel.inc"
451 #undef GET_GLOBALISEL_PREDICATES_INIT
452 #define GET_GLOBALISEL_TEMPORARIES_INIT
453 #include "AArch64GenGlobalISel.inc"
454 #undef GET_GLOBALISEL_TEMPORARIES_INIT
455 {
456 }
457 
458 // FIXME: This should be target-independent, inferred from the types declared
459 // for each class in the bank.
460 static const TargetRegisterClass *
461 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
462                          const RegisterBankInfo &RBI,
463                          bool GetAllRegSet = false) {
464   if (RB.getID() == AArch64::GPRRegBankID) {
465     if (Ty.getSizeInBits() <= 32)
466       return GetAllRegSet ? &AArch64::GPR32allRegClass
467                           : &AArch64::GPR32RegClass;
468     if (Ty.getSizeInBits() == 64)
469       return GetAllRegSet ? &AArch64::GPR64allRegClass
470                           : &AArch64::GPR64RegClass;
471     return nullptr;
472   }
473 
474   if (RB.getID() == AArch64::FPRRegBankID) {
475     if (Ty.getSizeInBits() <= 16)
476       return &AArch64::FPR16RegClass;
477     if (Ty.getSizeInBits() == 32)
478       return &AArch64::FPR32RegClass;
479     if (Ty.getSizeInBits() == 64)
480       return &AArch64::FPR64RegClass;
481     if (Ty.getSizeInBits() == 128)
482       return &AArch64::FPR128RegClass;
483     return nullptr;
484   }
485 
486   return nullptr;
487 }
488 
489 /// Given a register bank, and size in bits, return the smallest register class
490 /// that can represent that combination.
491 static const TargetRegisterClass *
492 getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
493                       bool GetAllRegSet = false) {
494   unsigned RegBankID = RB.getID();
495 
496   if (RegBankID == AArch64::GPRRegBankID) {
497     if (SizeInBits <= 32)
498       return GetAllRegSet ? &AArch64::GPR32allRegClass
499                           : &AArch64::GPR32RegClass;
500     if (SizeInBits == 64)
501       return GetAllRegSet ? &AArch64::GPR64allRegClass
502                           : &AArch64::GPR64RegClass;
503   }
504 
505   if (RegBankID == AArch64::FPRRegBankID) {
506     switch (SizeInBits) {
507     default:
508       return nullptr;
509     case 8:
510       return &AArch64::FPR8RegClass;
511     case 16:
512       return &AArch64::FPR16RegClass;
513     case 32:
514       return &AArch64::FPR32RegClass;
515     case 64:
516       return &AArch64::FPR64RegClass;
517     case 128:
518       return &AArch64::FPR128RegClass;
519     }
520   }
521 
522   return nullptr;
523 }
524 
525 /// Returns the correct subregister to use for a given register class.
526 static bool getSubRegForClass(const TargetRegisterClass *RC,
527                               const TargetRegisterInfo &TRI, unsigned &SubReg) {
528   switch (TRI.getRegSizeInBits(*RC)) {
529   case 8:
530     SubReg = AArch64::bsub;
531     break;
532   case 16:
533     SubReg = AArch64::hsub;
534     break;
535   case 32:
536     if (RC != &AArch64::FPR32RegClass)
537       SubReg = AArch64::sub_32;
538     else
539       SubReg = AArch64::ssub;
540     break;
541   case 64:
542     SubReg = AArch64::dsub;
543     break;
544   default:
545     LLVM_DEBUG(
546         dbgs() << "Couldn't find appropriate subregister for register class.");
547     return false;
548   }
549 
550   return true;
551 }
552 
553 /// Returns the minimum size the given register bank can hold.
554 static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
555   switch (RB.getID()) {
556   case AArch64::GPRRegBankID:
557     return 32;
558   case AArch64::FPRRegBankID:
559     return 8;
560   default:
561     llvm_unreachable("Tried to get minimum size for unknown register bank.");
562   }
563 }
564 
565 static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
566   auto &MI = *Root.getParent();
567   auto &MBB = *MI.getParent();
568   auto &MF = *MBB.getParent();
569   auto &MRI = MF.getRegInfo();
570   uint64_t Immed;
571   if (Root.isImm())
572     Immed = Root.getImm();
573   else if (Root.isCImm())
574     Immed = Root.getCImm()->getZExtValue();
575   else if (Root.isReg()) {
576     auto ValAndVReg =
577         getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
578     if (!ValAndVReg)
579       return None;
580     Immed = ValAndVReg->Value.getSExtValue();
581   } else
582     return None;
583   return Immed;
584 }
585 
586 /// Check whether \p I is a currently unsupported binary operation:
587 /// - it has an unsized type
588 /// - an operand is not a vreg
589 /// - all operands are not in the same bank
590 /// These are checks that should someday live in the verifier, but right now,
591 /// these are mostly limitations of the aarch64 selector.
592 static bool unsupportedBinOp(const MachineInstr &I,
593                              const AArch64RegisterBankInfo &RBI,
594                              const MachineRegisterInfo &MRI,
595                              const AArch64RegisterInfo &TRI) {
596   LLT Ty = MRI.getType(I.getOperand(0).getReg());
597   if (!Ty.isValid()) {
598     LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
599     return true;
600   }
601 
602   const RegisterBank *PrevOpBank = nullptr;
603   for (auto &MO : I.operands()) {
604     // FIXME: Support non-register operands.
605     if (!MO.isReg()) {
606       LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
607       return true;
608     }
609 
610     // FIXME: Can generic operations have physical registers operands? If
611     // so, this will need to be taught about that, and we'll need to get the
612     // bank out of the minimal class for the register.
613     // Either way, this needs to be documented (and possibly verified).
614     if (!Register::isVirtualRegister(MO.getReg())) {
615       LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
616       return true;
617     }
618 
619     const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
620     if (!OpBank) {
621       LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
622       return true;
623     }
624 
625     if (PrevOpBank && OpBank != PrevOpBank) {
626       LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
627       return true;
628     }
629     PrevOpBank = OpBank;
630   }
631   return false;
632 }
633 
634 /// Select the AArch64 opcode for the basic binary operation \p GenericOpc
635 /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
636 /// and of size \p OpSize.
637 /// \returns \p GenericOpc if the combination is unsupported.
638 static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
639                                unsigned OpSize) {
640   switch (RegBankID) {
641   case AArch64::GPRRegBankID:
642     if (OpSize == 32) {
643       switch (GenericOpc) {
644       case TargetOpcode::G_SHL:
645         return AArch64::LSLVWr;
646       case TargetOpcode::G_LSHR:
647         return AArch64::LSRVWr;
648       case TargetOpcode::G_ASHR:
649         return AArch64::ASRVWr;
650       default:
651         return GenericOpc;
652       }
653     } else if (OpSize == 64) {
654       switch (GenericOpc) {
655       case TargetOpcode::G_PTR_ADD:
656         return AArch64::ADDXrr;
657       case TargetOpcode::G_SHL:
658         return AArch64::LSLVXr;
659       case TargetOpcode::G_LSHR:
660         return AArch64::LSRVXr;
661       case TargetOpcode::G_ASHR:
662         return AArch64::ASRVXr;
663       default:
664         return GenericOpc;
665       }
666     }
667     break;
668   case AArch64::FPRRegBankID:
669     switch (OpSize) {
670     case 32:
671       switch (GenericOpc) {
672       case TargetOpcode::G_FADD:
673         return AArch64::FADDSrr;
674       case TargetOpcode::G_FSUB:
675         return AArch64::FSUBSrr;
676       case TargetOpcode::G_FMUL:
677         return AArch64::FMULSrr;
678       case TargetOpcode::G_FDIV:
679         return AArch64::FDIVSrr;
680       default:
681         return GenericOpc;
682       }
683     case 64:
684       switch (GenericOpc) {
685       case TargetOpcode::G_FADD:
686         return AArch64::FADDDrr;
687       case TargetOpcode::G_FSUB:
688         return AArch64::FSUBDrr;
689       case TargetOpcode::G_FMUL:
690         return AArch64::FMULDrr;
691       case TargetOpcode::G_FDIV:
692         return AArch64::FDIVDrr;
693       case TargetOpcode::G_OR:
694         return AArch64::ORRv8i8;
695       default:
696         return GenericOpc;
697       }
698     }
699     break;
700   }
701   return GenericOpc;
702 }
703 
704 /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
705 /// appropriate for the (value) register bank \p RegBankID and of memory access
706 /// size \p OpSize.  This returns the variant with the base+unsigned-immediate
707 /// addressing mode (e.g., LDRXui).
708 /// \returns \p GenericOpc if the combination is unsupported.
709 static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
710                                     unsigned OpSize) {
711   const bool isStore = GenericOpc == TargetOpcode::G_STORE;
712   switch (RegBankID) {
713   case AArch64::GPRRegBankID:
714     switch (OpSize) {
715     case 8:
716       return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
717     case 16:
718       return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
719     case 32:
720       return isStore ? AArch64::STRWui : AArch64::LDRWui;
721     case 64:
722       return isStore ? AArch64::STRXui : AArch64::LDRXui;
723     }
724     break;
725   case AArch64::FPRRegBankID:
726     switch (OpSize) {
727     case 8:
728       return isStore ? AArch64::STRBui : AArch64::LDRBui;
729     case 16:
730       return isStore ? AArch64::STRHui : AArch64::LDRHui;
731     case 32:
732       return isStore ? AArch64::STRSui : AArch64::LDRSui;
733     case 64:
734       return isStore ? AArch64::STRDui : AArch64::LDRDui;
735     }
736     break;
737   }
738   return GenericOpc;
739 }
740 
741 #ifndef NDEBUG
742 /// Helper function that verifies that we have a valid copy at the end of
743 /// selectCopy. Verifies that the source and dest have the expected sizes and
744 /// then returns true.
745 static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
746                         const MachineRegisterInfo &MRI,
747                         const TargetRegisterInfo &TRI,
748                         const RegisterBankInfo &RBI) {
749   const Register DstReg = I.getOperand(0).getReg();
750   const Register SrcReg = I.getOperand(1).getReg();
751   const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
752   const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
753 
754   // Make sure the size of the source and dest line up.
755   assert(
756       (DstSize == SrcSize ||
757        // Copies are a mean to setup initial types, the number of
758        // bits may not exactly match.
759        (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
760        // Copies are a mean to copy bits around, as long as we are
761        // on the same register class, that's fine. Otherwise, that
762        // means we need some SUBREG_TO_REG or AND & co.
763        (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
764       "Copy with different width?!");
765 
766   // Check the size of the destination.
767   assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
768          "GPRs cannot get more than 64-bit width values");
769 
770   return true;
771 }
772 #endif
773 
774 /// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
775 /// to \p *To.
776 ///
777 /// E.g "To = COPY SrcReg:SubReg"
778 static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
779                        const RegisterBankInfo &RBI, Register SrcReg,
780                        const TargetRegisterClass *To, unsigned SubReg) {
781   assert(SrcReg.isValid() && "Expected a valid source register?");
782   assert(To && "Destination register class cannot be null");
783   assert(SubReg && "Expected a valid subregister");
784 
785   MachineIRBuilder MIB(I);
786   auto SubRegCopy =
787       MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
788   MachineOperand &RegOp = I.getOperand(1);
789   RegOp.setReg(SubRegCopy.getReg(0));
790 
791   // It's possible that the destination register won't be constrained. Make
792   // sure that happens.
793   if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
794     RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
795 
796   return true;
797 }
798 
799 /// Helper function to get the source and destination register classes for a
800 /// copy. Returns a std::pair containing the source register class for the
801 /// copy, and the destination register class for the copy. If a register class
802 /// cannot be determined, then it will be nullptr.
803 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
804 getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
805                      MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
806                      const RegisterBankInfo &RBI) {
807   Register DstReg = I.getOperand(0).getReg();
808   Register SrcReg = I.getOperand(1).getReg();
809   const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
810   const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
811   unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
812   unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
813 
814   // Special casing for cross-bank copies of s1s. We can technically represent
815   // a 1-bit value with any size of register. The minimum size for a GPR is 32
816   // bits. So, we need to put the FPR on 32 bits as well.
817   //
818   // FIXME: I'm not sure if this case holds true outside of copies. If it does,
819   // then we can pull it into the helpers that get the appropriate class for a
820   // register bank. Or make a new helper that carries along some constraint
821   // information.
822   if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
823     SrcSize = DstSize = 32;
824 
825   return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
826           getMinClassForRegBank(DstRegBank, DstSize, true)};
827 }
828 
829 static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
830                        MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
831                        const RegisterBankInfo &RBI) {
832   Register DstReg = I.getOperand(0).getReg();
833   Register SrcReg = I.getOperand(1).getReg();
834   const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
835   const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
836 
837   // Find the correct register classes for the source and destination registers.
838   const TargetRegisterClass *SrcRC;
839   const TargetRegisterClass *DstRC;
840   std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
841 
842   if (!DstRC) {
843     LLVM_DEBUG(dbgs() << "Unexpected dest size "
844                       << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
845     return false;
846   }
847 
848   // A couple helpers below, for making sure that the copy we produce is valid.
849 
850   // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
851   // to verify that the src and dst are the same size, since that's handled by
852   // the SUBREG_TO_REG.
853   bool KnownValid = false;
854 
855   // Returns true, or asserts if something we don't expect happens. Instead of
856   // returning true, we return isValidCopy() to ensure that we verify the
857   // result.
858   auto CheckCopy = [&]() {
859     // If we have a bitcast or something, we can't have physical registers.
860     assert((I.isCopy() ||
861             (!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&
862              !Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&
863            "No phys reg on generic operator!");
864     bool ValidCopy = true;
865 #ifndef NDEBUG
866     ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI);
867     assert(ValidCopy && "Invalid copy.");
868     (void)KnownValid;
869 #endif
870     return ValidCopy;
871   };
872 
873   // Is this a copy? If so, then we may need to insert a subregister copy.
874   if (I.isCopy()) {
875     // Yes. Check if there's anything to fix up.
876     if (!SrcRC) {
877       LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
878       return false;
879     }
880 
881     unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
882     unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
883     unsigned SubReg;
884 
885     // If the source bank doesn't support a subregister copy small enough,
886     // then we first need to copy to the destination bank.
887     if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
888       const TargetRegisterClass *DstTempRC =
889           getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
890       getSubRegForClass(DstRC, TRI, SubReg);
891 
892       MachineIRBuilder MIB(I);
893       auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
894       copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
895     } else if (SrcSize > DstSize) {
896       // If the source register is bigger than the destination we need to
897       // perform a subregister copy.
898       const TargetRegisterClass *SubRegRC =
899           getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
900       getSubRegForClass(SubRegRC, TRI, SubReg);
901       copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
902     } else if (DstSize > SrcSize) {
903       // If the destination register is bigger than the source we need to do
904       // a promotion using SUBREG_TO_REG.
905       const TargetRegisterClass *PromotionRC =
906           getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
907       getSubRegForClass(SrcRC, TRI, SubReg);
908 
909       Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
910       BuildMI(*I.getParent(), I, I.getDebugLoc(),
911               TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
912           .addImm(0)
913           .addUse(SrcReg)
914           .addImm(SubReg);
915       MachineOperand &RegOp = I.getOperand(1);
916       RegOp.setReg(PromoteReg);
917 
918       // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
919       KnownValid = true;
920     }
921 
922     // If the destination is a physical register, then there's nothing to
923     // change, so we're done.
924     if (Register::isPhysicalRegister(DstReg))
925       return CheckCopy();
926   }
927 
928   // No need to constrain SrcReg. It will get constrained when we hit another
929   // of its use or its defs. Copies do not have constraints.
930   if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
931     LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
932                       << " operand\n");
933     return false;
934   }
935   I.setDesc(TII.get(AArch64::COPY));
936   return CheckCopy();
937 }
938 
939 static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
940   if (!DstTy.isScalar() || !SrcTy.isScalar())
941     return GenericOpc;
942 
943   const unsigned DstSize = DstTy.getSizeInBits();
944   const unsigned SrcSize = SrcTy.getSizeInBits();
945 
946   switch (DstSize) {
947   case 32:
948     switch (SrcSize) {
949     case 32:
950       switch (GenericOpc) {
951       case TargetOpcode::G_SITOFP:
952         return AArch64::SCVTFUWSri;
953       case TargetOpcode::G_UITOFP:
954         return AArch64::UCVTFUWSri;
955       case TargetOpcode::G_FPTOSI:
956         return AArch64::FCVTZSUWSr;
957       case TargetOpcode::G_FPTOUI:
958         return AArch64::FCVTZUUWSr;
959       default:
960         return GenericOpc;
961       }
962     case 64:
963       switch (GenericOpc) {
964       case TargetOpcode::G_SITOFP:
965         return AArch64::SCVTFUXSri;
966       case TargetOpcode::G_UITOFP:
967         return AArch64::UCVTFUXSri;
968       case TargetOpcode::G_FPTOSI:
969         return AArch64::FCVTZSUWDr;
970       case TargetOpcode::G_FPTOUI:
971         return AArch64::FCVTZUUWDr;
972       default:
973         return GenericOpc;
974       }
975     default:
976       return GenericOpc;
977     }
978   case 64:
979     switch (SrcSize) {
980     case 32:
981       switch (GenericOpc) {
982       case TargetOpcode::G_SITOFP:
983         return AArch64::SCVTFUWDri;
984       case TargetOpcode::G_UITOFP:
985         return AArch64::UCVTFUWDri;
986       case TargetOpcode::G_FPTOSI:
987         return AArch64::FCVTZSUXSr;
988       case TargetOpcode::G_FPTOUI:
989         return AArch64::FCVTZUUXSr;
990       default:
991         return GenericOpc;
992       }
993     case 64:
994       switch (GenericOpc) {
995       case TargetOpcode::G_SITOFP:
996         return AArch64::SCVTFUXDri;
997       case TargetOpcode::G_UITOFP:
998         return AArch64::UCVTFUXDri;
999       case TargetOpcode::G_FPTOSI:
1000         return AArch64::FCVTZSUXDr;
1001       case TargetOpcode::G_FPTOUI:
1002         return AArch64::FCVTZUUXDr;
1003       default:
1004         return GenericOpc;
1005       }
1006     default:
1007       return GenericOpc;
1008     }
1009   default:
1010     return GenericOpc;
1011   };
1012   return GenericOpc;
1013 }
1014 
1015 MachineInstr *
1016 AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1017                                        Register False, AArch64CC::CondCode CC,
1018                                        MachineIRBuilder &MIB) const {
1019   MachineRegisterInfo &MRI = *MIB.getMRI();
1020   assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
1021              RBI.getRegBank(True, MRI, TRI)->getID() &&
1022          "Expected both select operands to have the same regbank?");
1023   LLT Ty = MRI.getType(True);
1024   if (Ty.isVector())
1025     return nullptr;
1026   const unsigned Size = Ty.getSizeInBits();
1027   assert((Size == 32 || Size == 64) &&
1028          "Expected 32 bit or 64 bit select only?");
1029   const bool Is32Bit = Size == 32;
1030   if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1031     unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1032     auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1033     constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
1034     return &*FCSel;
1035   }
1036 
1037   // By default, we'll try and emit a CSEL.
1038   unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1039   bool Optimized = false;
1040   auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1041                                  &Optimized](Register &Reg, Register &OtherReg,
1042                                              bool Invert) {
1043     if (Optimized)
1044       return false;
1045 
1046     // Attempt to fold:
1047     //
1048     // %sub = G_SUB 0, %x
1049     // %select = G_SELECT cc, %reg, %sub
1050     //
1051     // Into:
1052     // %select = CSNEG %reg, %x, cc
1053     Register MatchReg;
1054     if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1055       Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1056       Reg = MatchReg;
1057       if (Invert) {
1058         CC = AArch64CC::getInvertedCondCode(CC);
1059         std::swap(Reg, OtherReg);
1060       }
1061       return true;
1062     }
1063 
1064     // Attempt to fold:
1065     //
1066     // %xor = G_XOR %x, -1
1067     // %select = G_SELECT cc, %reg, %xor
1068     //
1069     // Into:
1070     // %select = CSINV %reg, %x, cc
1071     if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1072       Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1073       Reg = MatchReg;
1074       if (Invert) {
1075         CC = AArch64CC::getInvertedCondCode(CC);
1076         std::swap(Reg, OtherReg);
1077       }
1078       return true;
1079     }
1080 
1081     // Attempt to fold:
1082     //
1083     // %add = G_ADD %x, 1
1084     // %select = G_SELECT cc, %reg, %add
1085     //
1086     // Into:
1087     // %select = CSINC %reg, %x, cc
1088     if (mi_match(Reg, MRI, m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)))) {
1089       Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1090       Reg = MatchReg;
1091       if (Invert) {
1092         CC = AArch64CC::getInvertedCondCode(CC);
1093         std::swap(Reg, OtherReg);
1094       }
1095       return true;
1096     }
1097 
1098     return false;
1099   };
1100 
1101   // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1102   // true/false values are constants.
1103   // FIXME: All of these patterns already exist in tablegen. We should be
1104   // able to import these.
1105   auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1106                           &Optimized]() {
1107     if (Optimized)
1108       return false;
1109     auto TrueCst = getConstantVRegValWithLookThrough(True, MRI);
1110     auto FalseCst = getConstantVRegValWithLookThrough(False, MRI);
1111     if (!TrueCst && !FalseCst)
1112       return false;
1113 
1114     Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1115     if (TrueCst && FalseCst) {
1116       int64_t T = TrueCst->Value.getSExtValue();
1117       int64_t F = FalseCst->Value.getSExtValue();
1118 
1119       if (T == 0 && F == 1) {
1120         // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1121         Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1122         True = ZReg;
1123         False = ZReg;
1124         return true;
1125       }
1126 
1127       if (T == 0 && F == -1) {
1128         // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1129         Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1130         True = ZReg;
1131         False = ZReg;
1132         return true;
1133       }
1134     }
1135 
1136     if (TrueCst) {
1137       int64_t T = TrueCst->Value.getSExtValue();
1138       if (T == 1) {
1139         // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1140         Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1141         True = False;
1142         False = ZReg;
1143         CC = AArch64CC::getInvertedCondCode(CC);
1144         return true;
1145       }
1146 
1147       if (T == -1) {
1148         // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1149         Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1150         True = False;
1151         False = ZReg;
1152         CC = AArch64CC::getInvertedCondCode(CC);
1153         return true;
1154       }
1155     }
1156 
1157     if (FalseCst) {
1158       int64_t F = FalseCst->Value.getSExtValue();
1159       if (F == 1) {
1160         // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1161         Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1162         False = ZReg;
1163         return true;
1164       }
1165 
1166       if (F == -1) {
1167         // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1168         Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1169         False = ZReg;
1170         return true;
1171       }
1172     }
1173     return false;
1174   };
1175 
1176   Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1177   Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1178   Optimized |= TryOptSelectCst();
1179   auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1180   constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
1181   return &*SelectInst;
1182 }
1183 
1184 static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
1185   switch (P) {
1186   default:
1187     llvm_unreachable("Unknown condition code!");
1188   case CmpInst::ICMP_NE:
1189     return AArch64CC::NE;
1190   case CmpInst::ICMP_EQ:
1191     return AArch64CC::EQ;
1192   case CmpInst::ICMP_SGT:
1193     return AArch64CC::GT;
1194   case CmpInst::ICMP_SGE:
1195     return AArch64CC::GE;
1196   case CmpInst::ICMP_SLT:
1197     return AArch64CC::LT;
1198   case CmpInst::ICMP_SLE:
1199     return AArch64CC::LE;
1200   case CmpInst::ICMP_UGT:
1201     return AArch64CC::HI;
1202   case CmpInst::ICMP_UGE:
1203     return AArch64CC::HS;
1204   case CmpInst::ICMP_ULT:
1205     return AArch64CC::LO;
1206   case CmpInst::ICMP_ULE:
1207     return AArch64CC::LS;
1208   }
1209 }
1210 
1211 static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
1212                                       AArch64CC::CondCode &CondCode,
1213                                       AArch64CC::CondCode &CondCode2) {
1214   CondCode2 = AArch64CC::AL;
1215   switch (P) {
1216   default:
1217     llvm_unreachable("Unknown FP condition!");
1218   case CmpInst::FCMP_OEQ:
1219     CondCode = AArch64CC::EQ;
1220     break;
1221   case CmpInst::FCMP_OGT:
1222     CondCode = AArch64CC::GT;
1223     break;
1224   case CmpInst::FCMP_OGE:
1225     CondCode = AArch64CC::GE;
1226     break;
1227   case CmpInst::FCMP_OLT:
1228     CondCode = AArch64CC::MI;
1229     break;
1230   case CmpInst::FCMP_OLE:
1231     CondCode = AArch64CC::LS;
1232     break;
1233   case CmpInst::FCMP_ONE:
1234     CondCode = AArch64CC::MI;
1235     CondCode2 = AArch64CC::GT;
1236     break;
1237   case CmpInst::FCMP_ORD:
1238     CondCode = AArch64CC::VC;
1239     break;
1240   case CmpInst::FCMP_UNO:
1241     CondCode = AArch64CC::VS;
1242     break;
1243   case CmpInst::FCMP_UEQ:
1244     CondCode = AArch64CC::EQ;
1245     CondCode2 = AArch64CC::VS;
1246     break;
1247   case CmpInst::FCMP_UGT:
1248     CondCode = AArch64CC::HI;
1249     break;
1250   case CmpInst::FCMP_UGE:
1251     CondCode = AArch64CC::PL;
1252     break;
1253   case CmpInst::FCMP_ULT:
1254     CondCode = AArch64CC::LT;
1255     break;
1256   case CmpInst::FCMP_ULE:
1257     CondCode = AArch64CC::LE;
1258     break;
1259   case CmpInst::FCMP_UNE:
1260     CondCode = AArch64CC::NE;
1261     break;
1262   }
1263 }
1264 
1265 /// Return a register which can be used as a bit to test in a TB(N)Z.
1266 static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1267                               MachineRegisterInfo &MRI) {
1268   assert(Reg.isValid() && "Expected valid register!");
1269   while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1270     unsigned Opc = MI->getOpcode();
1271 
1272     if (!MI->getOperand(0).isReg() ||
1273         !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1274       break;
1275 
1276     // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1277     //
1278     // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1279     // on the truncated x is the same as the bit number on x.
1280     if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1281         Opc == TargetOpcode::G_TRUNC) {
1282       Register NextReg = MI->getOperand(1).getReg();
1283       // Did we find something worth folding?
1284       if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1285         break;
1286 
1287       // NextReg is worth folding. Keep looking.
1288       Reg = NextReg;
1289       continue;
1290     }
1291 
1292     // Attempt to find a suitable operation with a constant on one side.
1293     Optional<uint64_t> C;
1294     Register TestReg;
1295     switch (Opc) {
1296     default:
1297       break;
1298     case TargetOpcode::G_AND:
1299     case TargetOpcode::G_XOR: {
1300       TestReg = MI->getOperand(1).getReg();
1301       Register ConstantReg = MI->getOperand(2).getReg();
1302       auto VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
1303       if (!VRegAndVal) {
1304         // AND commutes, check the other side for a constant.
1305         // FIXME: Can we canonicalize the constant so that it's always on the
1306         // same side at some point earlier?
1307         std::swap(ConstantReg, TestReg);
1308         VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
1309       }
1310       if (VRegAndVal)
1311         C = VRegAndVal->Value.getSExtValue();
1312       break;
1313     }
1314     case TargetOpcode::G_ASHR:
1315     case TargetOpcode::G_LSHR:
1316     case TargetOpcode::G_SHL: {
1317       TestReg = MI->getOperand(1).getReg();
1318       auto VRegAndVal =
1319           getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1320       if (VRegAndVal)
1321         C = VRegAndVal->Value.getSExtValue();
1322       break;
1323     }
1324     }
1325 
1326     // Didn't find a constant or viable register. Bail out of the loop.
1327     if (!C || !TestReg.isValid())
1328       break;
1329 
1330     // We found a suitable instruction with a constant. Check to see if we can
1331     // walk through the instruction.
1332     Register NextReg;
1333     unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1334     switch (Opc) {
1335     default:
1336       break;
1337     case TargetOpcode::G_AND:
1338       // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1339       if ((*C >> Bit) & 1)
1340         NextReg = TestReg;
1341       break;
1342     case TargetOpcode::G_SHL:
1343       // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1344       // the type of the register.
1345       if (*C <= Bit && (Bit - *C) < TestRegSize) {
1346         NextReg = TestReg;
1347         Bit = Bit - *C;
1348       }
1349       break;
1350     case TargetOpcode::G_ASHR:
1351       // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1352       // in x
1353       NextReg = TestReg;
1354       Bit = Bit + *C;
1355       if (Bit >= TestRegSize)
1356         Bit = TestRegSize - 1;
1357       break;
1358     case TargetOpcode::G_LSHR:
1359       // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1360       if ((Bit + *C) < TestRegSize) {
1361         NextReg = TestReg;
1362         Bit = Bit + *C;
1363       }
1364       break;
1365     case TargetOpcode::G_XOR:
1366       // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1367       // appropriate.
1368       //
1369       // e.g. If x' = xor x, c, and the b-th bit is set in c then
1370       //
1371       // tbz x', b -> tbnz x, b
1372       //
1373       // Because x' only has the b-th bit set if x does not.
1374       if ((*C >> Bit) & 1)
1375         Invert = !Invert;
1376       NextReg = TestReg;
1377       break;
1378     }
1379 
1380     // Check if we found anything worth folding.
1381     if (!NextReg.isValid())
1382       return Reg;
1383     Reg = NextReg;
1384   }
1385 
1386   return Reg;
1387 }
1388 
1389 MachineInstr *AArch64InstructionSelector::emitTestBit(
1390     Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1391     MachineIRBuilder &MIB) const {
1392   assert(TestReg.isValid());
1393   assert(ProduceNonFlagSettingCondBr &&
1394          "Cannot emit TB(N)Z with speculation tracking!");
1395   MachineRegisterInfo &MRI = *MIB.getMRI();
1396 
1397   // Attempt to optimize the test bit by walking over instructions.
1398   TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1399   LLT Ty = MRI.getType(TestReg);
1400   unsigned Size = Ty.getSizeInBits();
1401   assert(!Ty.isVector() && "Expected a scalar!");
1402   assert(Bit < 64 && "Bit is too large!");
1403 
1404   // When the test register is a 64-bit register, we have to narrow to make
1405   // TBNZW work.
1406   bool UseWReg = Bit < 32;
1407   unsigned NecessarySize = UseWReg ? 32 : 64;
1408   if (Size != NecessarySize)
1409     TestReg = moveScalarRegClass(
1410         TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1411         MIB);
1412 
1413   static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1414                                           {AArch64::TBZW, AArch64::TBNZW}};
1415   unsigned Opc = OpcTable[UseWReg][IsNegative];
1416   auto TestBitMI =
1417       MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1418   constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1419   return &*TestBitMI;
1420 }
1421 
1422 bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1423     MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1424     MachineIRBuilder &MIB) const {
1425   assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
1426   // Given something like this:
1427   //
1428   //  %x = ...Something...
1429   //  %one = G_CONSTANT i64 1
1430   //  %zero = G_CONSTANT i64 0
1431   //  %and = G_AND %x, %one
1432   //  %cmp = G_ICMP intpred(ne), %and, %zero
1433   //  %cmp_trunc = G_TRUNC %cmp
1434   //  G_BRCOND %cmp_trunc, %bb.3
1435   //
1436   // We want to try and fold the AND into the G_BRCOND and produce either a
1437   // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1438   //
1439   // In this case, we'd get
1440   //
1441   // TBNZ %x %bb.3
1442   //
1443 
1444   // Check if the AND has a constant on its RHS which we can use as a mask.
1445   // If it's a power of 2, then it's the same as checking a specific bit.
1446   // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1447   auto MaybeBit = getConstantVRegValWithLookThrough(
1448       AndInst.getOperand(2).getReg(), *MIB.getMRI());
1449   if (!MaybeBit)
1450     return false;
1451 
1452   int32_t Bit = MaybeBit->Value.exactLogBase2();
1453   if (Bit < 0)
1454     return false;
1455 
1456   Register TestReg = AndInst.getOperand(1).getReg();
1457 
1458   // Emit a TB(N)Z.
1459   emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1460   return true;
1461 }
1462 
1463 MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1464                                                   bool IsNegative,
1465                                                   MachineBasicBlock *DestMBB,
1466                                                   MachineIRBuilder &MIB) const {
1467   assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
1468   MachineRegisterInfo &MRI = *MIB.getMRI();
1469   assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
1470              AArch64::GPRRegBankID &&
1471          "Expected GPRs only?");
1472   auto Ty = MRI.getType(CompareReg);
1473   unsigned Width = Ty.getSizeInBits();
1474   assert(!Ty.isVector() && "Expected scalar only?");
1475   assert(Width <= 64 && "Expected width to be at most 64?");
1476   static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1477                                           {AArch64::CBNZW, AArch64::CBNZX}};
1478   unsigned Opc = OpcTable[IsNegative][Width == 64];
1479   auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1480   constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1481   return &*BranchMI;
1482 }
1483 
1484 bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1485     MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1486   assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
1487   assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1488   // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1489   // totally clean.  Some of them require two branches to implement.
1490   auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1491   emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1492                 Pred);
1493   AArch64CC::CondCode CC1, CC2;
1494   changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
1495   MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1496   MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1497   if (CC2 != AArch64CC::AL)
1498     MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1499   I.eraseFromParent();
1500   return true;
1501 }
1502 
1503 bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1504     MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1505   assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1506   assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1507   // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1508   //
1509   // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1510   // instructions will not be produced, as they are conditional branch
1511   // instructions that do not set flags.
1512   if (!ProduceNonFlagSettingCondBr)
1513     return false;
1514 
1515   MachineRegisterInfo &MRI = *MIB.getMRI();
1516   MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1517   auto Pred =
1518       static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1519   Register LHS = ICmp.getOperand(2).getReg();
1520   Register RHS = ICmp.getOperand(3).getReg();
1521 
1522   // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1523   auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
1524   MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1525 
1526   // When we can emit a TB(N)Z, prefer that.
1527   //
1528   // Handle non-commutative condition codes first.
1529   // Note that we don't want to do this when we have a G_AND because it can
1530   // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1531   if (VRegAndVal && !AndInst) {
1532     int64_t C = VRegAndVal->Value.getSExtValue();
1533 
1534     // When we have a greater-than comparison, we can just test if the msb is
1535     // zero.
1536     if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1537       uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1538       emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1539       I.eraseFromParent();
1540       return true;
1541     }
1542 
1543     // When we have a less than comparison, we can just test if the msb is not
1544     // zero.
1545     if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1546       uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1547       emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1548       I.eraseFromParent();
1549       return true;
1550     }
1551   }
1552 
1553   // Attempt to handle commutative condition codes. Right now, that's only
1554   // eq/ne.
1555   if (ICmpInst::isEquality(Pred)) {
1556     if (!VRegAndVal) {
1557       std::swap(RHS, LHS);
1558       VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
1559       AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1560     }
1561 
1562     if (VRegAndVal && VRegAndVal->Value == 0) {
1563       // If there's a G_AND feeding into this branch, try to fold it away by
1564       // emitting a TB(N)Z instead.
1565       //
1566       // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1567       // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1568       // would be redundant.
1569       if (AndInst &&
1570           tryOptAndIntoCompareBranch(
1571               *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1572         I.eraseFromParent();
1573         return true;
1574       }
1575 
1576       // Otherwise, try to emit a CB(N)Z instead.
1577       auto LHSTy = MRI.getType(LHS);
1578       if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1579         emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1580         I.eraseFromParent();
1581         return true;
1582       }
1583     }
1584   }
1585 
1586   return false;
1587 }
1588 
1589 bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1590     MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1591   assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1592   assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1593   if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1594     return true;
1595 
1596   // Couldn't optimize. Emit a compare + a Bcc.
1597   MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1598   auto PredOp = ICmp.getOperand(1);
1599   emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1600   const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
1601       static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1602   MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1603   I.eraseFromParent();
1604   return true;
1605 }
1606 
1607 bool AArch64InstructionSelector::selectCompareBranch(
1608     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1609   Register CondReg = I.getOperand(0).getReg();
1610   MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1611   if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) {
1612     CondReg = CCMI->getOperand(1).getReg();
1613     CCMI = MRI.getVRegDef(CondReg);
1614   }
1615 
1616   // Try to select the G_BRCOND using whatever is feeding the condition if
1617   // possible.
1618   MachineIRBuilder MIB(I);
1619   unsigned CCMIOpc = CCMI->getOpcode();
1620   if (CCMIOpc == TargetOpcode::G_FCMP)
1621     return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1622   if (CCMIOpc == TargetOpcode::G_ICMP)
1623     return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1624 
1625   // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1626   // instructions will not be produced, as they are conditional branch
1627   // instructions that do not set flags.
1628   if (ProduceNonFlagSettingCondBr) {
1629     emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1630                 I.getOperand(1).getMBB(), MIB);
1631     I.eraseFromParent();
1632     return true;
1633   }
1634 
1635   // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1636   auto TstMI =
1637       MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1638   constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
1639   auto Bcc = MIB.buildInstr(AArch64::Bcc)
1640                  .addImm(AArch64CC::EQ)
1641                  .addMBB(I.getOperand(1).getMBB());
1642   I.eraseFromParent();
1643   return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1644 }
1645 
1646 /// Returns the element immediate value of a vector shift operand if found.
1647 /// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1648 static Optional<int64_t> getVectorShiftImm(Register Reg,
1649                                            MachineRegisterInfo &MRI) {
1650   assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1651   MachineInstr *OpMI = MRI.getVRegDef(Reg);
1652   assert(OpMI && "Expected to find a vreg def for vector shift operand");
1653   if (OpMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR)
1654     return None;
1655 
1656   // Check all operands are identical immediates.
1657   int64_t ImmVal = 0;
1658   for (unsigned Idx = 1; Idx < OpMI->getNumOperands(); ++Idx) {
1659     auto VRegAndVal = getConstantVRegValWithLookThrough(OpMI->getOperand(Idx).getReg(), MRI);
1660     if (!VRegAndVal)
1661       return None;
1662 
1663     if (Idx == 1)
1664       ImmVal = VRegAndVal->Value.getSExtValue();
1665     if (ImmVal != VRegAndVal->Value.getSExtValue())
1666       return None;
1667   }
1668 
1669   return ImmVal;
1670 }
1671 
1672 /// Matches and returns the shift immediate value for a SHL instruction given
1673 /// a shift operand.
1674 static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) {
1675   Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1676   if (!ShiftImm)
1677     return None;
1678   // Check the immediate is in range for a SHL.
1679   int64_t Imm = *ShiftImm;
1680   if (Imm < 0)
1681     return None;
1682   switch (SrcTy.getElementType().getSizeInBits()) {
1683   default:
1684     LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1685     return None;
1686   case 8:
1687     if (Imm > 7)
1688       return None;
1689     break;
1690   case 16:
1691     if (Imm > 15)
1692       return None;
1693     break;
1694   case 32:
1695     if (Imm > 31)
1696       return None;
1697     break;
1698   case 64:
1699     if (Imm > 63)
1700       return None;
1701     break;
1702   }
1703   return Imm;
1704 }
1705 
1706 bool AArch64InstructionSelector::selectVectorSHL(
1707     MachineInstr &I, MachineRegisterInfo &MRI) const {
1708   assert(I.getOpcode() == TargetOpcode::G_SHL);
1709   Register DstReg = I.getOperand(0).getReg();
1710   const LLT Ty = MRI.getType(DstReg);
1711   Register Src1Reg = I.getOperand(1).getReg();
1712   Register Src2Reg = I.getOperand(2).getReg();
1713 
1714   if (!Ty.isVector())
1715     return false;
1716 
1717   // Check if we have a vector of constants on RHS that we can select as the
1718   // immediate form.
1719   Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1720 
1721   unsigned Opc = 0;
1722   if (Ty == LLT::vector(2, 64)) {
1723     Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1724   } else if (Ty == LLT::vector(4, 32)) {
1725     Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1726   } else if (Ty == LLT::vector(2, 32)) {
1727     Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1728   } else if (Ty == LLT::vector(4, 16)) {
1729     Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1730   } else if (Ty == LLT::vector(8, 16)) {
1731     Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1732   } else if (Ty == LLT::vector(16, 8)) {
1733     Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1734   } else if (Ty == LLT::vector(8, 8)) {
1735     Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1736   } else {
1737     LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1738     return false;
1739   }
1740 
1741   MachineIRBuilder MIB(I);
1742   auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1743   if (ImmVal)
1744     Shl.addImm(*ImmVal);
1745   else
1746     Shl.addUse(Src2Reg);
1747   constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
1748   I.eraseFromParent();
1749   return true;
1750 }
1751 
1752 bool AArch64InstructionSelector::selectVectorAshrLshr(
1753     MachineInstr &I, MachineRegisterInfo &MRI) const {
1754   assert(I.getOpcode() == TargetOpcode::G_ASHR ||
1755          I.getOpcode() == TargetOpcode::G_LSHR);
1756   Register DstReg = I.getOperand(0).getReg();
1757   const LLT Ty = MRI.getType(DstReg);
1758   Register Src1Reg = I.getOperand(1).getReg();
1759   Register Src2Reg = I.getOperand(2).getReg();
1760 
1761   if (!Ty.isVector())
1762     return false;
1763 
1764   bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1765 
1766   // We expect the immediate case to be lowered in the PostLegalCombiner to
1767   // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1768 
1769   // There is not a shift right register instruction, but the shift left
1770   // register instruction takes a signed value, where negative numbers specify a
1771   // right shift.
1772 
1773   unsigned Opc = 0;
1774   unsigned NegOpc = 0;
1775   const TargetRegisterClass *RC =
1776       getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
1777   if (Ty == LLT::vector(2, 64)) {
1778     Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1779     NegOpc = AArch64::NEGv2i64;
1780   } else if (Ty == LLT::vector(4, 32)) {
1781     Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1782     NegOpc = AArch64::NEGv4i32;
1783   } else if (Ty == LLT::vector(2, 32)) {
1784     Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1785     NegOpc = AArch64::NEGv2i32;
1786   } else if (Ty == LLT::vector(4, 16)) {
1787     Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1788     NegOpc = AArch64::NEGv4i16;
1789   } else if (Ty == LLT::vector(8, 16)) {
1790     Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1791     NegOpc = AArch64::NEGv8i16;
1792   } else if (Ty == LLT::vector(16, 8)) {
1793     Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1794     NegOpc = AArch64::NEGv16i8;
1795   } else if (Ty == LLT::vector(8, 8)) {
1796     Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1797     NegOpc = AArch64::NEGv8i8;
1798   } else {
1799     LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1800     return false;
1801   }
1802 
1803   MachineIRBuilder MIB(I);
1804   auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1805   constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1806   auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1807   constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1808   I.eraseFromParent();
1809   return true;
1810 }
1811 
1812 bool AArch64InstructionSelector::selectVaStartAAPCS(
1813     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1814   return false;
1815 }
1816 
1817 bool AArch64InstructionSelector::selectVaStartDarwin(
1818     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1819   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1820   Register ListReg = I.getOperand(0).getReg();
1821 
1822   Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1823 
1824   auto MIB =
1825       BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1826           .addDef(ArgsAddrReg)
1827           .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1828           .addImm(0)
1829           .addImm(0);
1830 
1831   constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1832 
1833   MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1834             .addUse(ArgsAddrReg)
1835             .addUse(ListReg)
1836             .addImm(0)
1837             .addMemOperand(*I.memoperands_begin());
1838 
1839   constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1840   I.eraseFromParent();
1841   return true;
1842 }
1843 
1844 void AArch64InstructionSelector::materializeLargeCMVal(
1845     MachineInstr &I, const Value *V, unsigned OpFlags) const {
1846   MachineBasicBlock &MBB = *I.getParent();
1847   MachineFunction &MF = *MBB.getParent();
1848   MachineRegisterInfo &MRI = MF.getRegInfo();
1849   MachineIRBuilder MIB(I);
1850 
1851   auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1852   MovZ->addOperand(MF, I.getOperand(1));
1853   MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1854                                      AArch64II::MO_NC);
1855   MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1856   constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1857 
1858   auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1859                        Register ForceDstReg) {
1860     Register DstReg = ForceDstReg
1861                           ? ForceDstReg
1862                           : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1863     auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1864     if (auto *GV = dyn_cast<GlobalValue>(V)) {
1865       MovI->addOperand(MF, MachineOperand::CreateGA(
1866                                GV, MovZ->getOperand(1).getOffset(), Flags));
1867     } else {
1868       MovI->addOperand(
1869           MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1870                                        MovZ->getOperand(1).getOffset(), Flags));
1871     }
1872     MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1873     constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
1874     return DstReg;
1875   };
1876   Register DstReg = BuildMovK(MovZ.getReg(0),
1877                               AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
1878   DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1879   BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1880 }
1881 
1882 bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
1883   MachineBasicBlock &MBB = *I.getParent();
1884   MachineFunction &MF = *MBB.getParent();
1885   MachineRegisterInfo &MRI = MF.getRegInfo();
1886 
1887   switch (I.getOpcode()) {
1888   case TargetOpcode::G_SHL:
1889   case TargetOpcode::G_ASHR:
1890   case TargetOpcode::G_LSHR: {
1891     // These shifts are legalized to have 64 bit shift amounts because we want
1892     // to take advantage of the existing imported selection patterns that assume
1893     // the immediates are s64s. However, if the shifted type is 32 bits and for
1894     // some reason we receive input GMIR that has an s64 shift amount that's not
1895     // a G_CONSTANT, insert a truncate so that we can still select the s32
1896     // register-register variant.
1897     Register SrcReg = I.getOperand(1).getReg();
1898     Register ShiftReg = I.getOperand(2).getReg();
1899     const LLT ShiftTy = MRI.getType(ShiftReg);
1900     const LLT SrcTy = MRI.getType(SrcReg);
1901     if (SrcTy.isVector())
1902       return false;
1903     assert(!ShiftTy.isVector() && "unexpected vector shift ty");
1904     if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
1905       return false;
1906     auto *AmtMI = MRI.getVRegDef(ShiftReg);
1907     assert(AmtMI && "could not find a vreg definition for shift amount");
1908     if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
1909       // Insert a subregister copy to implement a 64->32 trunc
1910       MachineIRBuilder MIB(I);
1911       auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
1912                        .addReg(ShiftReg, 0, AArch64::sub_32);
1913       MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1914       I.getOperand(2).setReg(Trunc.getReg(0));
1915     }
1916     return true;
1917   }
1918   case TargetOpcode::G_STORE:
1919     return contractCrossBankCopyIntoStore(I, MRI);
1920   case TargetOpcode::G_PTR_ADD:
1921     return convertPtrAddToAdd(I, MRI);
1922   case TargetOpcode::G_LOAD: {
1923     // For scalar loads of pointers, we try to convert the dest type from p0
1924     // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
1925     // conversion, this should be ok because all users should have been
1926     // selected already, so the type doesn't matter for them.
1927     Register DstReg = I.getOperand(0).getReg();
1928     const LLT DstTy = MRI.getType(DstReg);
1929     if (!DstTy.isPointer())
1930       return false;
1931     MRI.setType(DstReg, LLT::scalar(64));
1932     return true;
1933   }
1934   case AArch64::G_DUP: {
1935     // Convert the type from p0 to s64 to help selection.
1936     LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1937     if (!DstTy.getElementType().isPointer())
1938       return false;
1939     MachineIRBuilder MIB(I);
1940     auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
1941     MRI.setType(I.getOperand(0).getReg(),
1942                 DstTy.changeElementType(LLT::scalar(64)));
1943     MRI.setRegBank(NewSrc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1944     I.getOperand(1).setReg(NewSrc.getReg(0));
1945     return true;
1946   }
1947   case TargetOpcode::G_UITOFP:
1948   case TargetOpcode::G_SITOFP: {
1949     // If both source and destination regbanks are FPR, then convert the opcode
1950     // to G_SITOF so that the importer can select it to an fpr variant.
1951     // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
1952     // copy.
1953     Register SrcReg = I.getOperand(1).getReg();
1954     LLT SrcTy = MRI.getType(SrcReg);
1955     LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1956     if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
1957       return false;
1958 
1959     if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
1960       if (I.getOpcode() == TargetOpcode::G_SITOFP)
1961         I.setDesc(TII.get(AArch64::G_SITOF));
1962       else
1963         I.setDesc(TII.get(AArch64::G_UITOF));
1964       return true;
1965     }
1966     return false;
1967   }
1968   default:
1969     return false;
1970   }
1971 }
1972 
1973 /// This lowering tries to look for G_PTR_ADD instructions and then converts
1974 /// them to a standard G_ADD with a COPY on the source.
1975 ///
1976 /// The motivation behind this is to expose the add semantics to the imported
1977 /// tablegen patterns. We shouldn't need to check for uses being loads/stores,
1978 /// because the selector works bottom up, uses before defs. By the time we
1979 /// end up trying to select a G_PTR_ADD, we should have already attempted to
1980 /// fold this into addressing modes and were therefore unsuccessful.
1981 bool AArch64InstructionSelector::convertPtrAddToAdd(
1982     MachineInstr &I, MachineRegisterInfo &MRI) {
1983   assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1984   Register DstReg = I.getOperand(0).getReg();
1985   Register AddOp1Reg = I.getOperand(1).getReg();
1986   const LLT PtrTy = MRI.getType(DstReg);
1987   if (PtrTy.getAddressSpace() != 0)
1988     return false;
1989 
1990   MachineIRBuilder MIB(I);
1991   const LLT CastPtrTy = PtrTy.isVector() ? LLT::vector(2, 64) : LLT::scalar(64);
1992   auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
1993   // Set regbanks on the registers.
1994   if (PtrTy.isVector())
1995     MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
1996   else
1997     MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1998 
1999   // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2000   // %dst(intty) = G_ADD %intbase, off
2001   I.setDesc(TII.get(TargetOpcode::G_ADD));
2002   MRI.setType(DstReg, CastPtrTy);
2003   I.getOperand(1).setReg(PtrToInt.getReg(0));
2004   if (!select(*PtrToInt)) {
2005     LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
2006     return false;
2007   }
2008 
2009   // Also take the opportunity here to try to do some optimization.
2010   // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2011   Register NegatedReg;
2012   if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2013     return true;
2014   I.getOperand(2).setReg(NegatedReg);
2015   I.setDesc(TII.get(TargetOpcode::G_SUB));
2016   return true;
2017 }
2018 
2019 bool AArch64InstructionSelector::earlySelectSHL(
2020     MachineInstr &I, MachineRegisterInfo &MRI) const {
2021   // We try to match the immediate variant of LSL, which is actually an alias
2022   // for a special case of UBFM. Otherwise, we fall back to the imported
2023   // selector which will match the register variant.
2024   assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
2025   const auto &MO = I.getOperand(2);
2026   auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI);
2027   if (!VRegAndVal)
2028     return false;
2029 
2030   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2031   if (DstTy.isVector())
2032     return false;
2033   bool Is64Bit = DstTy.getSizeInBits() == 64;
2034   auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2035   auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2036   MachineIRBuilder MIB(I);
2037 
2038   if (!Imm1Fn || !Imm2Fn)
2039     return false;
2040 
2041   auto NewI =
2042       MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2043                      {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2044 
2045   for (auto &RenderFn : *Imm1Fn)
2046     RenderFn(NewI);
2047   for (auto &RenderFn : *Imm2Fn)
2048     RenderFn(NewI);
2049 
2050   I.eraseFromParent();
2051   return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2052 }
2053 
2054 bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2055     MachineInstr &I, MachineRegisterInfo &MRI) {
2056   assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
2057   // If we're storing a scalar, it doesn't matter what register bank that
2058   // scalar is on. All that matters is the size.
2059   //
2060   // So, if we see something like this (with a 32-bit scalar as an example):
2061   //
2062   // %x:gpr(s32) = ... something ...
2063   // %y:fpr(s32) = COPY %x:gpr(s32)
2064   // G_STORE %y:fpr(s32)
2065   //
2066   // We can fix this up into something like this:
2067   //
2068   // G_STORE %x:gpr(s32)
2069   //
2070   // And then continue the selection process normally.
2071   Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2072   if (!DefDstReg.isValid())
2073     return false;
2074   LLT DefDstTy = MRI.getType(DefDstReg);
2075   Register StoreSrcReg = I.getOperand(0).getReg();
2076   LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2077 
2078   // If we get something strange like a physical register, then we shouldn't
2079   // go any further.
2080   if (!DefDstTy.isValid())
2081     return false;
2082 
2083   // Are the source and dst types the same size?
2084   if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2085     return false;
2086 
2087   if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2088       RBI.getRegBank(DefDstReg, MRI, TRI))
2089     return false;
2090 
2091   // We have a cross-bank copy, which is entering a store. Let's fold it.
2092   I.getOperand(0).setReg(DefDstReg);
2093   return true;
2094 }
2095 
2096 bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
2097   assert(I.getParent() && "Instruction should be in a basic block!");
2098   assert(I.getParent()->getParent() && "Instruction should be in a function!");
2099 
2100   MachineBasicBlock &MBB = *I.getParent();
2101   MachineFunction &MF = *MBB.getParent();
2102   MachineRegisterInfo &MRI = MF.getRegInfo();
2103 
2104   switch (I.getOpcode()) {
2105   case TargetOpcode::G_BR: {
2106     // If the branch jumps to the fallthrough block, don't bother emitting it.
2107     // Only do this for -O0 for a good code size improvement, because when
2108     // optimizations are enabled we want to leave this choice to
2109     // MachineBlockPlacement.
2110     bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOpt::None;
2111     if (EnableOpt || !MBB.isLayoutSuccessor(I.getOperand(0).getMBB()))
2112       return false;
2113     I.eraseFromParent();
2114     return true;
2115   }
2116   case TargetOpcode::G_SHL:
2117     return earlySelectSHL(I, MRI);
2118   case TargetOpcode::G_CONSTANT: {
2119     bool IsZero = false;
2120     if (I.getOperand(1).isCImm())
2121       IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
2122     else if (I.getOperand(1).isImm())
2123       IsZero = I.getOperand(1).getImm() == 0;
2124 
2125     if (!IsZero)
2126       return false;
2127 
2128     Register DefReg = I.getOperand(0).getReg();
2129     LLT Ty = MRI.getType(DefReg);
2130     if (Ty.getSizeInBits() == 64) {
2131       I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2132       RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2133     } else if (Ty.getSizeInBits() == 32) {
2134       I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2135       RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2136     } else
2137       return false;
2138 
2139     I.setDesc(TII.get(TargetOpcode::COPY));
2140     return true;
2141   }
2142   default:
2143     return false;
2144   }
2145 }
2146 
2147 bool AArch64InstructionSelector::select(MachineInstr &I) {
2148   assert(I.getParent() && "Instruction should be in a basic block!");
2149   assert(I.getParent()->getParent() && "Instruction should be in a function!");
2150 
2151   MachineBasicBlock &MBB = *I.getParent();
2152   MachineFunction &MF = *MBB.getParent();
2153   MachineRegisterInfo &MRI = MF.getRegInfo();
2154 
2155   const AArch64Subtarget *Subtarget =
2156       &static_cast<const AArch64Subtarget &>(MF.getSubtarget());
2157   if (Subtarget->requiresStrictAlign()) {
2158     // We don't support this feature yet.
2159     LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
2160     return false;
2161   }
2162 
2163   unsigned Opcode = I.getOpcode();
2164   // G_PHI requires same handling as PHI
2165   if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2166     // Certain non-generic instructions also need some special handling.
2167 
2168     if (Opcode ==  TargetOpcode::LOAD_STACK_GUARD)
2169       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2170 
2171     if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2172       const Register DefReg = I.getOperand(0).getReg();
2173       const LLT DefTy = MRI.getType(DefReg);
2174 
2175       const RegClassOrRegBank &RegClassOrBank =
2176         MRI.getRegClassOrRegBank(DefReg);
2177 
2178       const TargetRegisterClass *DefRC
2179         = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2180       if (!DefRC) {
2181         if (!DefTy.isValid()) {
2182           LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2183           return false;
2184         }
2185         const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
2186         DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
2187         if (!DefRC) {
2188           LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2189           return false;
2190         }
2191       }
2192 
2193       I.setDesc(TII.get(TargetOpcode::PHI));
2194 
2195       return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2196     }
2197 
2198     if (I.isCopy())
2199       return selectCopy(I, TII, MRI, TRI, RBI);
2200 
2201     return true;
2202   }
2203 
2204 
2205   if (I.getNumOperands() != I.getNumExplicitOperands()) {
2206     LLVM_DEBUG(
2207         dbgs() << "Generic instruction has unexpected implicit operands\n");
2208     return false;
2209   }
2210 
2211   // Try to do some lowering before we start instruction selecting. These
2212   // lowerings are purely transformations on the input G_MIR and so selection
2213   // must continue after any modification of the instruction.
2214   if (preISelLower(I)) {
2215     Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2216   }
2217 
2218   // There may be patterns where the importer can't deal with them optimally,
2219   // but does select it to a suboptimal sequence so our custom C++ selection
2220   // code later never has a chance to work on it. Therefore, we have an early
2221   // selection attempt here to give priority to certain selection routines
2222   // over the imported ones.
2223   if (earlySelect(I))
2224     return true;
2225 
2226   if (selectImpl(I, *CoverageInfo))
2227     return true;
2228 
2229   LLT Ty =
2230       I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2231 
2232   MachineIRBuilder MIB(I);
2233 
2234   switch (Opcode) {
2235   case TargetOpcode::G_BRCOND:
2236     return selectCompareBranch(I, MF, MRI);
2237 
2238   case TargetOpcode::G_BRINDIRECT: {
2239     I.setDesc(TII.get(AArch64::BR));
2240     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2241   }
2242 
2243   case TargetOpcode::G_BRJT:
2244     return selectBrJT(I, MRI);
2245 
2246   case AArch64::G_ADD_LOW: {
2247     // This op may have been separated from it's ADRP companion by the localizer
2248     // or some other code motion pass. Given that many CPUs will try to
2249     // macro fuse these operations anyway, select this into a MOVaddr pseudo
2250     // which will later be expanded into an ADRP+ADD pair after scheduling.
2251     MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2252     if (BaseMI->getOpcode() != AArch64::ADRP) {
2253       I.setDesc(TII.get(AArch64::ADDXri));
2254       I.addOperand(MachineOperand::CreateImm(0));
2255       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2256     }
2257     assert(TM.getCodeModel() == CodeModel::Small &&
2258            "Expected small code model");
2259     MachineIRBuilder MIB(I);
2260     auto Op1 = BaseMI->getOperand(1);
2261     auto Op2 = I.getOperand(2);
2262     auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2263                        .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2264                                          Op1.getTargetFlags())
2265                        .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2266                                          Op2.getTargetFlags());
2267     I.eraseFromParent();
2268     return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2269   }
2270 
2271   case TargetOpcode::G_BSWAP: {
2272     // Handle vector types for G_BSWAP directly.
2273     Register DstReg = I.getOperand(0).getReg();
2274     LLT DstTy = MRI.getType(DstReg);
2275 
2276     // We should only get vector types here; everything else is handled by the
2277     // importer right now.
2278     if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
2279       LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
2280       return false;
2281     }
2282 
2283     // Only handle 4 and 2 element vectors for now.
2284     // TODO: 16-bit elements.
2285     unsigned NumElts = DstTy.getNumElements();
2286     if (NumElts != 4 && NumElts != 2) {
2287       LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
2288       return false;
2289     }
2290 
2291     // Choose the correct opcode for the supported types. Right now, that's
2292     // v2s32, v4s32, and v2s64.
2293     unsigned Opc = 0;
2294     unsigned EltSize = DstTy.getElementType().getSizeInBits();
2295     if (EltSize == 32)
2296       Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
2297                                           : AArch64::REV32v16i8;
2298     else if (EltSize == 64)
2299       Opc = AArch64::REV64v16i8;
2300 
2301     // We should always get something by the time we get here...
2302     assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
2303 
2304     I.setDesc(TII.get(Opc));
2305     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2306   }
2307 
2308   case TargetOpcode::G_FCONSTANT:
2309   case TargetOpcode::G_CONSTANT: {
2310     const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2311 
2312     const LLT s8 = LLT::scalar(8);
2313     const LLT s16 = LLT::scalar(16);
2314     const LLT s32 = LLT::scalar(32);
2315     const LLT s64 = LLT::scalar(64);
2316     const LLT s128 = LLT::scalar(128);
2317     const LLT p0 = LLT::pointer(0, 64);
2318 
2319     const Register DefReg = I.getOperand(0).getReg();
2320     const LLT DefTy = MRI.getType(DefReg);
2321     const unsigned DefSize = DefTy.getSizeInBits();
2322     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2323 
2324     // FIXME: Redundant check, but even less readable when factored out.
2325     if (isFP) {
2326       if (Ty != s32 && Ty != s64 && Ty != s128) {
2327         LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2328                           << " constant, expected: " << s32 << " or " << s64
2329                           << " or " << s128 << '\n');
2330         return false;
2331       }
2332 
2333       if (RB.getID() != AArch64::FPRRegBankID) {
2334         LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2335                           << " constant on bank: " << RB
2336                           << ", expected: FPR\n");
2337         return false;
2338       }
2339 
2340       // The case when we have 0.0 is covered by tablegen. Reject it here so we
2341       // can be sure tablegen works correctly and isn't rescued by this code.
2342       // 0.0 is not covered by tablegen for FP128. So we will handle this
2343       // scenario in the code here.
2344       if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2345         return false;
2346     } else {
2347       // s32 and s64 are covered by tablegen.
2348       if (Ty != p0 && Ty != s8 && Ty != s16) {
2349         LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2350                           << " constant, expected: " << s32 << ", " << s64
2351                           << ", or " << p0 << '\n');
2352         return false;
2353       }
2354 
2355       if (RB.getID() != AArch64::GPRRegBankID) {
2356         LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2357                           << " constant on bank: " << RB
2358                           << ", expected: GPR\n");
2359         return false;
2360       }
2361     }
2362 
2363     // We allow G_CONSTANT of types < 32b.
2364     const unsigned MovOpc =
2365         DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2366 
2367     if (isFP) {
2368       // Either emit a FMOV, or emit a copy to emit a normal mov.
2369       const TargetRegisterClass &GPRRC =
2370           DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
2371       const TargetRegisterClass &FPRRC =
2372           DefSize == 32 ? AArch64::FPR32RegClass
2373                         : (DefSize == 64 ? AArch64::FPR64RegClass
2374                                          : AArch64::FPR128RegClass);
2375 
2376       // Can we use a FMOV instruction to represent the immediate?
2377       if (emitFMovForFConstant(I, MRI))
2378         return true;
2379 
2380       // For 64b values, emit a constant pool load instead.
2381       if (DefSize == 64 || DefSize == 128) {
2382         auto *FPImm = I.getOperand(1).getFPImm();
2383         MachineIRBuilder MIB(I);
2384         auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2385         if (!LoadMI) {
2386           LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2387           return false;
2388         }
2389         MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2390         I.eraseFromParent();
2391         return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2392       }
2393 
2394       // Nope. Emit a copy and use a normal mov instead.
2395       const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
2396       MachineOperand &RegOp = I.getOperand(0);
2397       RegOp.setReg(DefGPRReg);
2398       MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2399       MIB.buildCopy({DefReg}, {DefGPRReg});
2400 
2401       if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2402         LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2403         return false;
2404       }
2405 
2406       MachineOperand &ImmOp = I.getOperand(1);
2407       // FIXME: Is going through int64_t always correct?
2408       ImmOp.ChangeToImmediate(
2409           ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
2410     } else if (I.getOperand(1).isCImm()) {
2411       uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2412       I.getOperand(1).ChangeToImmediate(Val);
2413     } else if (I.getOperand(1).isImm()) {
2414       uint64_t Val = I.getOperand(1).getImm();
2415       I.getOperand(1).ChangeToImmediate(Val);
2416     }
2417 
2418     I.setDesc(TII.get(MovOpc));
2419     constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2420     return true;
2421   }
2422   case TargetOpcode::G_EXTRACT: {
2423     Register DstReg = I.getOperand(0).getReg();
2424     Register SrcReg = I.getOperand(1).getReg();
2425     LLT SrcTy = MRI.getType(SrcReg);
2426     LLT DstTy = MRI.getType(DstReg);
2427     (void)DstTy;
2428     unsigned SrcSize = SrcTy.getSizeInBits();
2429 
2430     if (SrcTy.getSizeInBits() > 64) {
2431       // This should be an extract of an s128, which is like a vector extract.
2432       if (SrcTy.getSizeInBits() != 128)
2433         return false;
2434       // Only support extracting 64 bits from an s128 at the moment.
2435       if (DstTy.getSizeInBits() != 64)
2436         return false;
2437 
2438       const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2439       const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2440       // Check we have the right regbank always.
2441       assert(SrcRB.getID() == AArch64::FPRRegBankID &&
2442              DstRB.getID() == AArch64::FPRRegBankID &&
2443              "Wrong extract regbank!");
2444       (void)SrcRB;
2445 
2446       // Emit the same code as a vector extract.
2447       // Offset must be a multiple of 64.
2448       unsigned Offset = I.getOperand(2).getImm();
2449       if (Offset % 64 != 0)
2450         return false;
2451       unsigned LaneIdx = Offset / 64;
2452       MachineIRBuilder MIB(I);
2453       MachineInstr *Extract = emitExtractVectorElt(
2454           DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2455       if (!Extract)
2456         return false;
2457       I.eraseFromParent();
2458       return true;
2459     }
2460 
2461     I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2462     MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2463                                       Ty.getSizeInBits() - 1);
2464 
2465     if (SrcSize < 64) {
2466       assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
2467              "unexpected G_EXTRACT types");
2468       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2469     }
2470 
2471     DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2472     MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2473     MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2474         .addReg(DstReg, 0, AArch64::sub_32);
2475     RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2476                                  AArch64::GPR32RegClass, MRI);
2477     I.getOperand(0).setReg(DstReg);
2478 
2479     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2480   }
2481 
2482   case TargetOpcode::G_INSERT: {
2483     LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2484     LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2485     unsigned DstSize = DstTy.getSizeInBits();
2486     // Larger inserts are vectors, same-size ones should be something else by
2487     // now (split up or turned into COPYs).
2488     if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2489       return false;
2490 
2491     I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2492     unsigned LSB = I.getOperand(3).getImm();
2493     unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2494     I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2495     MachineInstrBuilder(MF, I).addImm(Width - 1);
2496 
2497     if (DstSize < 64) {
2498       assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
2499              "unexpected G_INSERT types");
2500       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2501     }
2502 
2503     Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2504     BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2505             TII.get(AArch64::SUBREG_TO_REG))
2506         .addDef(SrcReg)
2507         .addImm(0)
2508         .addUse(I.getOperand(2).getReg())
2509         .addImm(AArch64::sub_32);
2510     RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2511                                  AArch64::GPR32RegClass, MRI);
2512     I.getOperand(2).setReg(SrcReg);
2513 
2514     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2515   }
2516   case TargetOpcode::G_FRAME_INDEX: {
2517     // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2518     if (Ty != LLT::pointer(0, 64)) {
2519       LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2520                         << ", expected: " << LLT::pointer(0, 64) << '\n');
2521       return false;
2522     }
2523     I.setDesc(TII.get(AArch64::ADDXri));
2524 
2525     // MOs for a #0 shifted immediate.
2526     I.addOperand(MachineOperand::CreateImm(0));
2527     I.addOperand(MachineOperand::CreateImm(0));
2528 
2529     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2530   }
2531 
2532   case TargetOpcode::G_GLOBAL_VALUE: {
2533     auto GV = I.getOperand(1).getGlobal();
2534     if (GV->isThreadLocal())
2535       return selectTLSGlobalValue(I, MRI);
2536 
2537     unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
2538     if (OpFlags & AArch64II::MO_GOT) {
2539       I.setDesc(TII.get(AArch64::LOADgot));
2540       I.getOperand(1).setTargetFlags(OpFlags);
2541     } else if (TM.getCodeModel() == CodeModel::Large) {
2542       // Materialize the global using movz/movk instructions.
2543       materializeLargeCMVal(I, GV, OpFlags);
2544       I.eraseFromParent();
2545       return true;
2546     } else if (TM.getCodeModel() == CodeModel::Tiny) {
2547       I.setDesc(TII.get(AArch64::ADR));
2548       I.getOperand(1).setTargetFlags(OpFlags);
2549     } else {
2550       I.setDesc(TII.get(AArch64::MOVaddr));
2551       I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2552       MachineInstrBuilder MIB(MF, I);
2553       MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2554                            OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2555     }
2556     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2557   }
2558 
2559   case TargetOpcode::G_ZEXTLOAD:
2560   case TargetOpcode::G_LOAD:
2561   case TargetOpcode::G_STORE: {
2562     bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2563     MachineIRBuilder MIB(I);
2564 
2565     LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
2566 
2567     if (PtrTy != LLT::pointer(0, 64)) {
2568       LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
2569                         << ", expected: " << LLT::pointer(0, 64) << '\n');
2570       return false;
2571     }
2572 
2573     auto &MemOp = **I.memoperands_begin();
2574     uint64_t MemSizeInBytes = MemOp.getSize();
2575     if (MemOp.isAtomic()) {
2576       // For now we just support s8 acquire loads to be able to compile stack
2577       // protector code.
2578       if (MemOp.getOrdering() == AtomicOrdering::Acquire &&
2579           MemSizeInBytes == 1) {
2580         I.setDesc(TII.get(AArch64::LDARB));
2581         return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2582       }
2583       LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n");
2584       return false;
2585     }
2586     unsigned MemSizeInBits = MemSizeInBytes * 8;
2587 
2588 #ifndef NDEBUG
2589     const Register PtrReg = I.getOperand(1).getReg();
2590     const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2591     // Sanity-check the pointer register.
2592     assert(PtrRB.getID() == AArch64::GPRRegBankID &&
2593            "Load/Store pointer operand isn't a GPR");
2594     assert(MRI.getType(PtrReg).isPointer() &&
2595            "Load/Store pointer operand isn't a pointer");
2596 #endif
2597 
2598     const Register ValReg = I.getOperand(0).getReg();
2599     const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2600 
2601     // Helper lambda for partially selecting I. Either returns the original
2602     // instruction with an updated opcode, or a new instruction.
2603     auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2604       bool IsStore = I.getOpcode() == TargetOpcode::G_STORE;
2605       const unsigned NewOpc =
2606           selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2607       if (NewOpc == I.getOpcode())
2608         return nullptr;
2609       // Check if we can fold anything into the addressing mode.
2610       auto AddrModeFns =
2611           selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
2612       if (!AddrModeFns) {
2613         // Can't fold anything. Use the original instruction.
2614         I.setDesc(TII.get(NewOpc));
2615         I.addOperand(MachineOperand::CreateImm(0));
2616         return &I;
2617       }
2618 
2619       // Folded something. Create a new instruction and return it.
2620       auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2621       IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg);
2622       NewInst.cloneMemRefs(I);
2623       for (auto &Fn : *AddrModeFns)
2624         Fn(NewInst);
2625       I.eraseFromParent();
2626       return &*NewInst;
2627     };
2628 
2629     MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
2630     if (!LoadStore)
2631       return false;
2632 
2633     // If we're storing a 0, use WZR/XZR.
2634     if (Opcode == TargetOpcode::G_STORE) {
2635       auto CVal = getConstantVRegValWithLookThrough(
2636           LoadStore->getOperand(0).getReg(), MRI, /*LookThroughInstrs = */ true,
2637           /*HandleFConstants = */ false);
2638       if (CVal && CVal->Value == 0) {
2639         switch (LoadStore->getOpcode()) {
2640         case AArch64::STRWui:
2641         case AArch64::STRHHui:
2642         case AArch64::STRBBui:
2643           LoadStore->getOperand(0).setReg(AArch64::WZR);
2644           break;
2645         case AArch64::STRXui:
2646           LoadStore->getOperand(0).setReg(AArch64::XZR);
2647           break;
2648         }
2649       }
2650     }
2651 
2652     if (IsZExtLoad) {
2653       // The zextload from a smaller type to i32 should be handled by the
2654       // importer.
2655       if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
2656         return false;
2657       // If we have a ZEXTLOAD then change the load's type to be a narrower reg
2658       // and zero_extend with SUBREG_TO_REG.
2659       Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2660       Register DstReg = LoadStore->getOperand(0).getReg();
2661       LoadStore->getOperand(0).setReg(LdReg);
2662 
2663       MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
2664       MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
2665           .addImm(0)
2666           .addUse(LdReg)
2667           .addImm(AArch64::sub_32);
2668       constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2669       return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
2670                                           MRI);
2671     }
2672     return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2673   }
2674 
2675   case TargetOpcode::G_SMULH:
2676   case TargetOpcode::G_UMULH: {
2677     // Reject the various things we don't support yet.
2678     if (unsupportedBinOp(I, RBI, MRI, TRI))
2679       return false;
2680 
2681     const Register DefReg = I.getOperand(0).getReg();
2682     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2683 
2684     if (RB.getID() != AArch64::GPRRegBankID) {
2685       LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
2686       return false;
2687     }
2688 
2689     if (Ty != LLT::scalar(64)) {
2690       LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
2691                         << ", expected: " << LLT::scalar(64) << '\n');
2692       return false;
2693     }
2694 
2695     unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
2696                                                              : AArch64::UMULHrr;
2697     I.setDesc(TII.get(NewOpc));
2698 
2699     // Now that we selected an opcode, we need to constrain the register
2700     // operands to use appropriate classes.
2701     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2702   }
2703   case TargetOpcode::G_LSHR:
2704   case TargetOpcode::G_ASHR:
2705     if (MRI.getType(I.getOperand(0).getReg()).isVector())
2706       return selectVectorAshrLshr(I, MRI);
2707     LLVM_FALLTHROUGH;
2708   case TargetOpcode::G_SHL:
2709     if (Opcode == TargetOpcode::G_SHL &&
2710         MRI.getType(I.getOperand(0).getReg()).isVector())
2711       return selectVectorSHL(I, MRI);
2712     LLVM_FALLTHROUGH;
2713   case TargetOpcode::G_FADD:
2714   case TargetOpcode::G_FSUB:
2715   case TargetOpcode::G_FMUL:
2716   case TargetOpcode::G_FDIV:
2717   case TargetOpcode::G_OR: {
2718     // Reject the various things we don't support yet.
2719     if (unsupportedBinOp(I, RBI, MRI, TRI))
2720       return false;
2721 
2722     const unsigned OpSize = Ty.getSizeInBits();
2723 
2724     const Register DefReg = I.getOperand(0).getReg();
2725     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2726 
2727     const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
2728     if (NewOpc == I.getOpcode())
2729       return false;
2730 
2731     I.setDesc(TII.get(NewOpc));
2732     // FIXME: Should the type be always reset in setDesc?
2733 
2734     // Now that we selected an opcode, we need to constrain the register
2735     // operands to use appropriate classes.
2736     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2737   }
2738 
2739   case TargetOpcode::G_PTR_ADD: {
2740     MachineIRBuilder MIRBuilder(I);
2741     emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2),
2742             MIRBuilder);
2743     I.eraseFromParent();
2744     return true;
2745   }
2746   case TargetOpcode::G_SADDO:
2747   case TargetOpcode::G_UADDO:
2748   case TargetOpcode::G_SSUBO:
2749   case TargetOpcode::G_USUBO: {
2750     // Emit the operation and get the correct condition code.
2751     MachineIRBuilder MIRBuilder(I);
2752     auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
2753                                   I.getOperand(2), I.getOperand(3), MIRBuilder);
2754 
2755     // Now, put the overflow result in the register given by the first operand
2756     // to the overflow op. CSINC increments the result when the predicate is
2757     // false, so to get the increment when it's true, we need to use the
2758     // inverse. In this case, we want to increment when carry is set.
2759     Register ZReg = AArch64::WZR;
2760     auto CsetMI = MIRBuilder
2761                       .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
2762                                   {ZReg, ZReg})
2763                       .addImm(getInvertedCondCode(OpAndCC.second));
2764     constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
2765     I.eraseFromParent();
2766     return true;
2767   }
2768 
2769   case TargetOpcode::G_PTRMASK: {
2770     Register MaskReg = I.getOperand(2).getReg();
2771     Optional<int64_t> MaskVal = getConstantVRegSExtVal(MaskReg, MRI);
2772     // TODO: Implement arbitrary cases
2773     if (!MaskVal || !isShiftedMask_64(*MaskVal))
2774       return false;
2775 
2776     uint64_t Mask = *MaskVal;
2777     I.setDesc(TII.get(AArch64::ANDXri));
2778     I.getOperand(2).ChangeToImmediate(
2779         AArch64_AM::encodeLogicalImmediate(Mask, 64));
2780 
2781     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2782   }
2783   case TargetOpcode::G_PTRTOINT:
2784   case TargetOpcode::G_TRUNC: {
2785     const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2786     const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2787 
2788     const Register DstReg = I.getOperand(0).getReg();
2789     const Register SrcReg = I.getOperand(1).getReg();
2790 
2791     const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2792     const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2793 
2794     if (DstRB.getID() != SrcRB.getID()) {
2795       LLVM_DEBUG(
2796           dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
2797       return false;
2798     }
2799 
2800     if (DstRB.getID() == AArch64::GPRRegBankID) {
2801       const TargetRegisterClass *DstRC =
2802           getRegClassForTypeOnBank(DstTy, DstRB, RBI);
2803       if (!DstRC)
2804         return false;
2805 
2806       const TargetRegisterClass *SrcRC =
2807           getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
2808       if (!SrcRC)
2809         return false;
2810 
2811       if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
2812           !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
2813         LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
2814         return false;
2815       }
2816 
2817       if (DstRC == SrcRC) {
2818         // Nothing to be done
2819       } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
2820                  SrcTy == LLT::scalar(64)) {
2821         llvm_unreachable("TableGen can import this case");
2822         return false;
2823       } else if (DstRC == &AArch64::GPR32RegClass &&
2824                  SrcRC == &AArch64::GPR64RegClass) {
2825         I.getOperand(1).setSubReg(AArch64::sub_32);
2826       } else {
2827         LLVM_DEBUG(
2828             dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
2829         return false;
2830       }
2831 
2832       I.setDesc(TII.get(TargetOpcode::COPY));
2833       return true;
2834     } else if (DstRB.getID() == AArch64::FPRRegBankID) {
2835       if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
2836         I.setDesc(TII.get(AArch64::XTNv4i16));
2837         constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2838         return true;
2839       }
2840 
2841       if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
2842         MachineIRBuilder MIB(I);
2843         MachineInstr *Extract = emitExtractVectorElt(
2844             DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
2845         if (!Extract)
2846           return false;
2847         I.eraseFromParent();
2848         return true;
2849       }
2850 
2851       // We might have a vector G_PTRTOINT, in which case just emit a COPY.
2852       if (Opcode == TargetOpcode::G_PTRTOINT) {
2853         assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
2854         I.setDesc(TII.get(TargetOpcode::COPY));
2855         return true;
2856       }
2857     }
2858 
2859     return false;
2860   }
2861 
2862   case TargetOpcode::G_ANYEXT: {
2863     const Register DstReg = I.getOperand(0).getReg();
2864     const Register SrcReg = I.getOperand(1).getReg();
2865 
2866     const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
2867     if (RBDst.getID() != AArch64::GPRRegBankID) {
2868       LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
2869                         << ", expected: GPR\n");
2870       return false;
2871     }
2872 
2873     const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
2874     if (RBSrc.getID() != AArch64::GPRRegBankID) {
2875       LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
2876                         << ", expected: GPR\n");
2877       return false;
2878     }
2879 
2880     const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
2881 
2882     if (DstSize == 0) {
2883       LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
2884       return false;
2885     }
2886 
2887     if (DstSize != 64 && DstSize > 32) {
2888       LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
2889                         << ", expected: 32 or 64\n");
2890       return false;
2891     }
2892     // At this point G_ANYEXT is just like a plain COPY, but we need
2893     // to explicitly form the 64-bit value if any.
2894     if (DstSize > 32) {
2895       Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
2896       BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
2897           .addDef(ExtSrc)
2898           .addImm(0)
2899           .addUse(SrcReg)
2900           .addImm(AArch64::sub_32);
2901       I.getOperand(1).setReg(ExtSrc);
2902     }
2903     return selectCopy(I, TII, MRI, TRI, RBI);
2904   }
2905 
2906   case TargetOpcode::G_ZEXT:
2907   case TargetOpcode::G_SEXT_INREG:
2908   case TargetOpcode::G_SEXT: {
2909     unsigned Opcode = I.getOpcode();
2910     const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
2911     const Register DefReg = I.getOperand(0).getReg();
2912     Register SrcReg = I.getOperand(1).getReg();
2913     const LLT DstTy = MRI.getType(DefReg);
2914     const LLT SrcTy = MRI.getType(SrcReg);
2915     unsigned DstSize = DstTy.getSizeInBits();
2916     unsigned SrcSize = SrcTy.getSizeInBits();
2917 
2918     // SEXT_INREG has the same src reg size as dst, the size of the value to be
2919     // extended is encoded in the imm.
2920     if (Opcode == TargetOpcode::G_SEXT_INREG)
2921       SrcSize = I.getOperand(2).getImm();
2922 
2923     if (DstTy.isVector())
2924       return false; // Should be handled by imported patterns.
2925 
2926     assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
2927                AArch64::GPRRegBankID &&
2928            "Unexpected ext regbank");
2929 
2930     MachineIRBuilder MIB(I);
2931     MachineInstr *ExtI;
2932 
2933     // First check if we're extending the result of a load which has a dest type
2934     // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
2935     // GPR register on AArch64 and all loads which are smaller automatically
2936     // zero-extend the upper bits. E.g.
2937     // %v(s8) = G_LOAD %p, :: (load 1)
2938     // %v2(s32) = G_ZEXT %v(s8)
2939     if (!IsSigned) {
2940       auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
2941       bool IsGPR =
2942           RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
2943       if (LoadMI && IsGPR) {
2944         const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
2945         unsigned BytesLoaded = MemOp->getSize();
2946         if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
2947           return selectCopy(I, TII, MRI, TRI, RBI);
2948       }
2949 
2950       // If we are zero extending from 32 bits to 64 bits, it's possible that
2951       // the instruction implicitly does the zero extend for us. In that case,
2952       // we can just emit a SUBREG_TO_REG.
2953       if (IsGPR && SrcSize == 32 && DstSize == 64) {
2954         // Unlike with the G_LOAD case, we don't want to look through copies
2955         // here.
2956         MachineInstr *Def = MRI.getVRegDef(SrcReg);
2957         if (Def && isDef32(*Def)) {
2958           MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
2959               .addImm(0)
2960               .addUse(SrcReg)
2961               .addImm(AArch64::sub_32);
2962 
2963           if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
2964                                             MRI)) {
2965             LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
2966             return false;
2967           }
2968 
2969           if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
2970                                             MRI)) {
2971             LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
2972             return false;
2973           }
2974 
2975           I.eraseFromParent();
2976           return true;
2977         }
2978       }
2979     }
2980 
2981     if (DstSize == 64) {
2982       if (Opcode != TargetOpcode::G_SEXT_INREG) {
2983         // FIXME: Can we avoid manually doing this?
2984         if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
2985                                           MRI)) {
2986           LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
2987                             << " operand\n");
2988           return false;
2989         }
2990         SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
2991                                 {&AArch64::GPR64RegClass}, {})
2992                      .addImm(0)
2993                      .addUse(SrcReg)
2994                      .addImm(AArch64::sub_32)
2995                      .getReg(0);
2996       }
2997 
2998       ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
2999                              {DefReg}, {SrcReg})
3000                   .addImm(0)
3001                   .addImm(SrcSize - 1);
3002     } else if (DstSize <= 32) {
3003       ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3004                              {DefReg}, {SrcReg})
3005                   .addImm(0)
3006                   .addImm(SrcSize - 1);
3007     } else {
3008       return false;
3009     }
3010 
3011     constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
3012     I.eraseFromParent();
3013     return true;
3014   }
3015 
3016   case TargetOpcode::G_SITOFP:
3017   case TargetOpcode::G_UITOFP:
3018   case TargetOpcode::G_FPTOSI:
3019   case TargetOpcode::G_FPTOUI: {
3020     const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
3021               SrcTy = MRI.getType(I.getOperand(1).getReg());
3022     const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
3023     if (NewOpc == Opcode)
3024       return false;
3025 
3026     I.setDesc(TII.get(NewOpc));
3027     constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3028 
3029     return true;
3030   }
3031 
3032   case TargetOpcode::G_FREEZE:
3033     return selectCopy(I, TII, MRI, TRI, RBI);
3034 
3035   case TargetOpcode::G_INTTOPTR:
3036     // The importer is currently unable to import pointer types since they
3037     // didn't exist in SelectionDAG.
3038     return selectCopy(I, TII, MRI, TRI, RBI);
3039 
3040   case TargetOpcode::G_BITCAST:
3041     // Imported SelectionDAG rules can handle every bitcast except those that
3042     // bitcast from a type to the same type. Ideally, these shouldn't occur
3043     // but we might not run an optimizer that deletes them. The other exception
3044     // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3045     // of them.
3046     return selectCopy(I, TII, MRI, TRI, RBI);
3047 
3048   case TargetOpcode::G_SELECT: {
3049     if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
3050       LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
3051                         << ", expected: " << LLT::scalar(1) << '\n');
3052       return false;
3053     }
3054 
3055     const Register CondReg = I.getOperand(1).getReg();
3056     const Register TReg = I.getOperand(2).getReg();
3057     const Register FReg = I.getOperand(3).getReg();
3058 
3059     if (tryOptSelect(I))
3060       return true;
3061 
3062     // Make sure to use an unused vreg instead of wzr, so that the peephole
3063     // optimizations will be able to optimize these.
3064     MachineIRBuilder MIB(I);
3065     Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3066     auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3067                      .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3068     constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3069     if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB))
3070       return false;
3071     I.eraseFromParent();
3072     return true;
3073   }
3074   case TargetOpcode::G_ICMP: {
3075     if (Ty.isVector())
3076       return selectVectorICmp(I, MRI);
3077 
3078     if (Ty != LLT::scalar(32)) {
3079       LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
3080                         << ", expected: " << LLT::scalar(32) << '\n');
3081       return false;
3082     }
3083 
3084     MachineIRBuilder MIRBuilder(I);
3085     auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3086     emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
3087                        MIRBuilder);
3088     emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIRBuilder);
3089     I.eraseFromParent();
3090     return true;
3091   }
3092 
3093   case TargetOpcode::G_FCMP: {
3094     MachineIRBuilder MIRBuilder(I);
3095     CmpInst::Predicate Pred =
3096         static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3097     if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(),
3098                        MIRBuilder, Pred) ||
3099         !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIRBuilder))
3100       return false;
3101     I.eraseFromParent();
3102     return true;
3103   }
3104   case TargetOpcode::G_VASTART:
3105     return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3106                                 : selectVaStartAAPCS(I, MF, MRI);
3107   case TargetOpcode::G_INTRINSIC:
3108     return selectIntrinsic(I, MRI);
3109   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3110     return selectIntrinsicWithSideEffects(I, MRI);
3111   case TargetOpcode::G_IMPLICIT_DEF: {
3112     I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3113     const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3114     const Register DstReg = I.getOperand(0).getReg();
3115     const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3116     const TargetRegisterClass *DstRC =
3117         getRegClassForTypeOnBank(DstTy, DstRB, RBI);
3118     RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3119     return true;
3120   }
3121   case TargetOpcode::G_BLOCK_ADDR: {
3122     if (TM.getCodeModel() == CodeModel::Large) {
3123       materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3124       I.eraseFromParent();
3125       return true;
3126     } else {
3127       I.setDesc(TII.get(AArch64::MOVaddrBA));
3128       auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3129                            I.getOperand(0).getReg())
3130                        .addBlockAddress(I.getOperand(1).getBlockAddress(),
3131                                         /* Offset */ 0, AArch64II::MO_PAGE)
3132                        .addBlockAddress(
3133                            I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3134                            AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3135       I.eraseFromParent();
3136       return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3137     }
3138   }
3139   case AArch64::G_DUP: {
3140     // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3141     // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3142     // difficult because at RBS we may end up pessimizing the fpr case if we
3143     // decided to add an anyextend to fix this. Manual selection is the most
3144     // robust solution for now.
3145     Register SrcReg = I.getOperand(1).getReg();
3146     if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::GPRRegBankID)
3147       return false; // We expect the fpr regbank case to be imported.
3148     LLT SrcTy = MRI.getType(SrcReg);
3149     if (SrcTy.getSizeInBits() == 16)
3150       I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3151     else if (SrcTy.getSizeInBits() == 8)
3152       I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3153     else
3154       return false;
3155     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3156   }
3157   case TargetOpcode::G_INTRINSIC_TRUNC:
3158     return selectIntrinsicTrunc(I, MRI);
3159   case TargetOpcode::G_INTRINSIC_ROUND:
3160     return selectIntrinsicRound(I, MRI);
3161   case TargetOpcode::G_BUILD_VECTOR:
3162     return selectBuildVector(I, MRI);
3163   case TargetOpcode::G_MERGE_VALUES:
3164     return selectMergeValues(I, MRI);
3165   case TargetOpcode::G_UNMERGE_VALUES:
3166     return selectUnmergeValues(I, MRI);
3167   case TargetOpcode::G_SHUFFLE_VECTOR:
3168     return selectShuffleVector(I, MRI);
3169   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3170     return selectExtractElt(I, MRI);
3171   case TargetOpcode::G_INSERT_VECTOR_ELT:
3172     return selectInsertElt(I, MRI);
3173   case TargetOpcode::G_CONCAT_VECTORS:
3174     return selectConcatVectors(I, MRI);
3175   case TargetOpcode::G_JUMP_TABLE:
3176     return selectJumpTable(I, MRI);
3177   case TargetOpcode::G_VECREDUCE_FADD:
3178   case TargetOpcode::G_VECREDUCE_ADD:
3179     return selectReduction(I, MRI);
3180   }
3181 
3182   return false;
3183 }
3184 
3185 bool AArch64InstructionSelector::selectReduction(
3186     MachineInstr &I, MachineRegisterInfo &MRI) const {
3187   Register VecReg = I.getOperand(1).getReg();
3188   LLT VecTy = MRI.getType(VecReg);
3189   if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
3190     unsigned Opc = 0;
3191     if (VecTy == LLT::vector(16, 8))
3192       Opc = AArch64::ADDVv16i8v;
3193     else if (VecTy == LLT::vector(8, 16))
3194       Opc = AArch64::ADDVv8i16v;
3195     else if (VecTy == LLT::vector(4, 32))
3196       Opc = AArch64::ADDVv4i32v;
3197     else if (VecTy == LLT::vector(2, 64))
3198       Opc = AArch64::ADDPv2i64p;
3199     else {
3200       LLVM_DEBUG(dbgs() << "Unhandled type for add reduction");
3201       return false;
3202     }
3203     I.setDesc(TII.get(Opc));
3204     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3205   }
3206 
3207   if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
3208     unsigned Opc = 0;
3209     if (VecTy == LLT::vector(2, 32))
3210       Opc = AArch64::FADDPv2i32p;
3211     else if (VecTy == LLT::vector(2, 64))
3212       Opc = AArch64::FADDPv2i64p;
3213     else {
3214       LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction");
3215       return false;
3216     }
3217     I.setDesc(TII.get(Opc));
3218     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3219   }
3220   return false;
3221 }
3222 
3223 bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3224                                             MachineRegisterInfo &MRI) const {
3225   assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
3226   Register JTAddr = I.getOperand(0).getReg();
3227   unsigned JTI = I.getOperand(1).getIndex();
3228   Register Index = I.getOperand(2).getReg();
3229   MachineIRBuilder MIB(I);
3230 
3231   Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3232   Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3233 
3234   MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3235   auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3236                                       {TargetReg, ScratchReg}, {JTAddr, Index})
3237                            .addJumpTableIndex(JTI);
3238   // Build the indirect branch.
3239   MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3240   I.eraseFromParent();
3241   return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3242 }
3243 
3244 bool AArch64InstructionSelector::selectJumpTable(
3245     MachineInstr &I, MachineRegisterInfo &MRI) const {
3246   assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
3247   assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
3248 
3249   Register DstReg = I.getOperand(0).getReg();
3250   unsigned JTI = I.getOperand(1).getIndex();
3251   // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3252   MachineIRBuilder MIB(I);
3253   auto MovMI =
3254     MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3255           .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3256           .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3257   I.eraseFromParent();
3258   return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3259 }
3260 
3261 bool AArch64InstructionSelector::selectTLSGlobalValue(
3262     MachineInstr &I, MachineRegisterInfo &MRI) const {
3263   if (!STI.isTargetMachO())
3264     return false;
3265   MachineFunction &MF = *I.getParent()->getParent();
3266   MF.getFrameInfo().setAdjustsStack(true);
3267 
3268   const GlobalValue &GV = *I.getOperand(1).getGlobal();
3269   MachineIRBuilder MIB(I);
3270 
3271   auto LoadGOT =
3272       MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3273           .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3274 
3275   auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3276                              {LoadGOT.getReg(0)})
3277                   .addImm(0);
3278 
3279   MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3280   // TLS calls preserve all registers except those that absolutely must be
3281   // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3282   // silly).
3283   MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
3284       .addUse(AArch64::X0, RegState::Implicit)
3285       .addDef(AArch64::X0, RegState::Implicit)
3286       .addRegMask(TRI.getTLSCallPreservedMask());
3287 
3288   MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3289   RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3290                                MRI);
3291   I.eraseFromParent();
3292   return true;
3293 }
3294 
3295 bool AArch64InstructionSelector::selectIntrinsicTrunc(
3296     MachineInstr &I, MachineRegisterInfo &MRI) const {
3297   const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3298 
3299   // Select the correct opcode.
3300   unsigned Opc = 0;
3301   if (!SrcTy.isVector()) {
3302     switch (SrcTy.getSizeInBits()) {
3303     default:
3304     case 16:
3305       Opc = AArch64::FRINTZHr;
3306       break;
3307     case 32:
3308       Opc = AArch64::FRINTZSr;
3309       break;
3310     case 64:
3311       Opc = AArch64::FRINTZDr;
3312       break;
3313     }
3314   } else {
3315     unsigned NumElts = SrcTy.getNumElements();
3316     switch (SrcTy.getElementType().getSizeInBits()) {
3317     default:
3318       break;
3319     case 16:
3320       if (NumElts == 4)
3321         Opc = AArch64::FRINTZv4f16;
3322       else if (NumElts == 8)
3323         Opc = AArch64::FRINTZv8f16;
3324       break;
3325     case 32:
3326       if (NumElts == 2)
3327         Opc = AArch64::FRINTZv2f32;
3328       else if (NumElts == 4)
3329         Opc = AArch64::FRINTZv4f32;
3330       break;
3331     case 64:
3332       if (NumElts == 2)
3333         Opc = AArch64::FRINTZv2f64;
3334       break;
3335     }
3336   }
3337 
3338   if (!Opc) {
3339     // Didn't get an opcode above, bail.
3340     LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
3341     return false;
3342   }
3343 
3344   // Legalization would have set us up perfectly for this; we just need to
3345   // set the opcode and move on.
3346   I.setDesc(TII.get(Opc));
3347   return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3348 }
3349 
3350 bool AArch64InstructionSelector::selectIntrinsicRound(
3351     MachineInstr &I, MachineRegisterInfo &MRI) const {
3352   const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3353 
3354   // Select the correct opcode.
3355   unsigned Opc = 0;
3356   if (!SrcTy.isVector()) {
3357     switch (SrcTy.getSizeInBits()) {
3358     default:
3359     case 16:
3360       Opc = AArch64::FRINTAHr;
3361       break;
3362     case 32:
3363       Opc = AArch64::FRINTASr;
3364       break;
3365     case 64:
3366       Opc = AArch64::FRINTADr;
3367       break;
3368     }
3369   } else {
3370     unsigned NumElts = SrcTy.getNumElements();
3371     switch (SrcTy.getElementType().getSizeInBits()) {
3372     default:
3373       break;
3374     case 16:
3375       if (NumElts == 4)
3376         Opc = AArch64::FRINTAv4f16;
3377       else if (NumElts == 8)
3378         Opc = AArch64::FRINTAv8f16;
3379       break;
3380     case 32:
3381       if (NumElts == 2)
3382         Opc = AArch64::FRINTAv2f32;
3383       else if (NumElts == 4)
3384         Opc = AArch64::FRINTAv4f32;
3385       break;
3386     case 64:
3387       if (NumElts == 2)
3388         Opc = AArch64::FRINTAv2f64;
3389       break;
3390     }
3391   }
3392 
3393   if (!Opc) {
3394     // Didn't get an opcode above, bail.
3395     LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
3396     return false;
3397   }
3398 
3399   // Legalization would have set us up perfectly for this; we just need to
3400   // set the opcode and move on.
3401   I.setDesc(TII.get(Opc));
3402   return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3403 }
3404 
3405 bool AArch64InstructionSelector::selectVectorICmp(
3406     MachineInstr &I, MachineRegisterInfo &MRI) const {
3407   Register DstReg = I.getOperand(0).getReg();
3408   LLT DstTy = MRI.getType(DstReg);
3409   Register SrcReg = I.getOperand(2).getReg();
3410   Register Src2Reg = I.getOperand(3).getReg();
3411   LLT SrcTy = MRI.getType(SrcReg);
3412 
3413   unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
3414   unsigned NumElts = DstTy.getNumElements();
3415 
3416   // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
3417   // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
3418   // Third index is cc opcode:
3419   // 0 == eq
3420   // 1 == ugt
3421   // 2 == uge
3422   // 3 == ult
3423   // 4 == ule
3424   // 5 == sgt
3425   // 6 == sge
3426   // 7 == slt
3427   // 8 == sle
3428   // ne is done by negating 'eq' result.
3429 
3430   // This table below assumes that for some comparisons the operands will be
3431   // commuted.
3432   // ult op == commute + ugt op
3433   // ule op == commute + uge op
3434   // slt op == commute + sgt op
3435   // sle op == commute + sge op
3436   unsigned PredIdx = 0;
3437   bool SwapOperands = false;
3438   CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
3439   switch (Pred) {
3440   case CmpInst::ICMP_NE:
3441   case CmpInst::ICMP_EQ:
3442     PredIdx = 0;
3443     break;
3444   case CmpInst::ICMP_UGT:
3445     PredIdx = 1;
3446     break;
3447   case CmpInst::ICMP_UGE:
3448     PredIdx = 2;
3449     break;
3450   case CmpInst::ICMP_ULT:
3451     PredIdx = 3;
3452     SwapOperands = true;
3453     break;
3454   case CmpInst::ICMP_ULE:
3455     PredIdx = 4;
3456     SwapOperands = true;
3457     break;
3458   case CmpInst::ICMP_SGT:
3459     PredIdx = 5;
3460     break;
3461   case CmpInst::ICMP_SGE:
3462     PredIdx = 6;
3463     break;
3464   case CmpInst::ICMP_SLT:
3465     PredIdx = 7;
3466     SwapOperands = true;
3467     break;
3468   case CmpInst::ICMP_SLE:
3469     PredIdx = 8;
3470     SwapOperands = true;
3471     break;
3472   default:
3473     llvm_unreachable("Unhandled icmp predicate");
3474     return false;
3475   }
3476 
3477   // This table obviously should be tablegen'd when we have our GISel native
3478   // tablegen selector.
3479 
3480   static const unsigned OpcTable[4][4][9] = {
3481       {
3482           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3483            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3484            0 /* invalid */},
3485           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3486            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3487            0 /* invalid */},
3488           {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3489            AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3490            AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3491           {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3492            AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3493            AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3494       },
3495       {
3496           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3497            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3498            0 /* invalid */},
3499           {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3500            AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3501            AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3502           {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3503            AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3504            AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3505           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3506            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3507            0 /* invalid */}
3508       },
3509       {
3510           {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3511            AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3512            AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3513           {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3514            AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3515            AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3516           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3517            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3518            0 /* invalid */},
3519           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3520            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3521            0 /* invalid */}
3522       },
3523       {
3524           {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3525            AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3526            AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3527           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3528            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3529            0 /* invalid */},
3530           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3531            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3532            0 /* invalid */},
3533           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3534            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3535            0 /* invalid */}
3536       },
3537   };
3538   unsigned EltIdx = Log2_32(SrcEltSize / 8);
3539   unsigned NumEltsIdx = Log2_32(NumElts / 2);
3540   unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3541   if (!Opc) {
3542     LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
3543     return false;
3544   }
3545 
3546   const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3547   const TargetRegisterClass *SrcRC =
3548       getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
3549   if (!SrcRC) {
3550     LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3551     return false;
3552   }
3553 
3554   unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3555   if (SrcTy.getSizeInBits() == 128)
3556     NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3557 
3558   if (SwapOperands)
3559     std::swap(SrcReg, Src2Reg);
3560 
3561   MachineIRBuilder MIB(I);
3562   auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
3563   constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3564 
3565   // Invert if we had a 'ne' cc.
3566   if (NotOpc) {
3567     Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
3568     constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3569   } else {
3570     MIB.buildCopy(DstReg, Cmp.getReg(0));
3571   }
3572   RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
3573   I.eraseFromParent();
3574   return true;
3575 }
3576 
3577 MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3578     unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3579     MachineIRBuilder &MIRBuilder) const {
3580   auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3581 
3582   auto BuildFn = [&](unsigned SubregIndex) {
3583     auto Ins =
3584         MIRBuilder
3585             .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3586             .addImm(SubregIndex);
3587     constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
3588     constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
3589     return &*Ins;
3590   };
3591 
3592   switch (EltSize) {
3593   case 16:
3594     return BuildFn(AArch64::hsub);
3595   case 32:
3596     return BuildFn(AArch64::ssub);
3597   case 64:
3598     return BuildFn(AArch64::dsub);
3599   default:
3600     return nullptr;
3601   }
3602 }
3603 
3604 bool AArch64InstructionSelector::selectMergeValues(
3605     MachineInstr &I, MachineRegisterInfo &MRI) const {
3606   assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3607   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3608   const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3609   assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3610   const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3611 
3612   if (I.getNumOperands() != 3)
3613     return false;
3614 
3615   // Merging 2 s64s into an s128.
3616   if (DstTy == LLT::scalar(128)) {
3617     if (SrcTy.getSizeInBits() != 64)
3618       return false;
3619     MachineIRBuilder MIB(I);
3620     Register DstReg = I.getOperand(0).getReg();
3621     Register Src1Reg = I.getOperand(1).getReg();
3622     Register Src2Reg = I.getOperand(2).getReg();
3623     auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3624     MachineInstr *InsMI =
3625         emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
3626     if (!InsMI)
3627       return false;
3628     MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3629                                           Src2Reg, /* LaneIdx */ 1, RB, MIB);
3630     if (!Ins2MI)
3631       return false;
3632     constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3633     constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
3634     I.eraseFromParent();
3635     return true;
3636   }
3637 
3638   if (RB.getID() != AArch64::GPRRegBankID)
3639     return false;
3640 
3641   if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3642     return false;
3643 
3644   auto *DstRC = &AArch64::GPR64RegClass;
3645   Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3646   MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3647                                     TII.get(TargetOpcode::SUBREG_TO_REG))
3648                                 .addDef(SubToRegDef)
3649                                 .addImm(0)
3650                                 .addUse(I.getOperand(1).getReg())
3651                                 .addImm(AArch64::sub_32);
3652   Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3653   // Need to anyext the second scalar before we can use bfm
3654   MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3655                                     TII.get(TargetOpcode::SUBREG_TO_REG))
3656                                 .addDef(SubToRegDef2)
3657                                 .addImm(0)
3658                                 .addUse(I.getOperand(2).getReg())
3659                                 .addImm(AArch64::sub_32);
3660   MachineInstr &BFM =
3661       *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3662            .addDef(I.getOperand(0).getReg())
3663            .addUse(SubToRegDef)
3664            .addUse(SubToRegDef2)
3665            .addImm(32)
3666            .addImm(31);
3667   constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3668   constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3669   constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
3670   I.eraseFromParent();
3671   return true;
3672 }
3673 
3674 static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3675                               const unsigned EltSize) {
3676   // Choose a lane copy opcode and subregister based off of the size of the
3677   // vector's elements.
3678   switch (EltSize) {
3679   case 16:
3680     CopyOpc = AArch64::CPYi16;
3681     ExtractSubReg = AArch64::hsub;
3682     break;
3683   case 32:
3684     CopyOpc = AArch64::CPYi32;
3685     ExtractSubReg = AArch64::ssub;
3686     break;
3687   case 64:
3688     CopyOpc = AArch64::CPYi64;
3689     ExtractSubReg = AArch64::dsub;
3690     break;
3691   default:
3692     // Unknown size, bail out.
3693     LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
3694     return false;
3695   }
3696   return true;
3697 }
3698 
3699 MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3700     Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3701     Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3702   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3703   unsigned CopyOpc = 0;
3704   unsigned ExtractSubReg = 0;
3705   if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3706     LLVM_DEBUG(
3707         dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
3708     return nullptr;
3709   }
3710 
3711   const TargetRegisterClass *DstRC =
3712       getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
3713   if (!DstRC) {
3714     LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
3715     return nullptr;
3716   }
3717 
3718   const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3719   const LLT &VecTy = MRI.getType(VecReg);
3720   const TargetRegisterClass *VecRC =
3721       getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
3722   if (!VecRC) {
3723     LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3724     return nullptr;
3725   }
3726 
3727   // The register that we're going to copy into.
3728   Register InsertReg = VecReg;
3729   if (!DstReg)
3730     DstReg = MRI.createVirtualRegister(DstRC);
3731   // If the lane index is 0, we just use a subregister COPY.
3732   if (LaneIdx == 0) {
3733     auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3734                     .addReg(VecReg, 0, ExtractSubReg);
3735     RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3736     return &*Copy;
3737   }
3738 
3739   // Lane copies require 128-bit wide registers. If we're dealing with an
3740   // unpacked vector, then we need to move up to that width. Insert an implicit
3741   // def and a subregister insert to get us there.
3742   if (VecTy.getSizeInBits() != 128) {
3743     MachineInstr *ScalarToVector = emitScalarToVector(
3744         VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3745     if (!ScalarToVector)
3746       return nullptr;
3747     InsertReg = ScalarToVector->getOperand(0).getReg();
3748   }
3749 
3750   MachineInstr *LaneCopyMI =
3751       MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3752   constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
3753 
3754   // Make sure that we actually constrain the initial copy.
3755   RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3756   return LaneCopyMI;
3757 }
3758 
3759 bool AArch64InstructionSelector::selectExtractElt(
3760     MachineInstr &I, MachineRegisterInfo &MRI) const {
3761   assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
3762          "unexpected opcode!");
3763   Register DstReg = I.getOperand(0).getReg();
3764   const LLT NarrowTy = MRI.getType(DstReg);
3765   const Register SrcReg = I.getOperand(1).getReg();
3766   const LLT WideTy = MRI.getType(SrcReg);
3767   (void)WideTy;
3768   assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
3769          "source register size too small!");
3770   assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
3771 
3772   // Need the lane index to determine the correct copy opcode.
3773   MachineOperand &LaneIdxOp = I.getOperand(2);
3774   assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
3775 
3776   if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
3777     LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
3778     return false;
3779   }
3780 
3781   // Find the index to extract from.
3782   auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
3783   if (!VRegAndVal)
3784     return false;
3785   unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
3786 
3787   MachineIRBuilder MIRBuilder(I);
3788 
3789   const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3790   MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
3791                                                LaneIdx, MIRBuilder);
3792   if (!Extract)
3793     return false;
3794 
3795   I.eraseFromParent();
3796   return true;
3797 }
3798 
3799 bool AArch64InstructionSelector::selectSplitVectorUnmerge(
3800     MachineInstr &I, MachineRegisterInfo &MRI) const {
3801   unsigned NumElts = I.getNumOperands() - 1;
3802   Register SrcReg = I.getOperand(NumElts).getReg();
3803   const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3804   const LLT SrcTy = MRI.getType(SrcReg);
3805 
3806   assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
3807   if (SrcTy.getSizeInBits() > 128) {
3808     LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
3809     return false;
3810   }
3811 
3812   MachineIRBuilder MIB(I);
3813 
3814   // We implement a split vector operation by treating the sub-vectors as
3815   // scalars and extracting them.
3816   const RegisterBank &DstRB =
3817       *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
3818   for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
3819     Register Dst = I.getOperand(OpIdx).getReg();
3820     MachineInstr *Extract =
3821         emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
3822     if (!Extract)
3823       return false;
3824   }
3825   I.eraseFromParent();
3826   return true;
3827 }
3828 
3829 bool AArch64InstructionSelector::selectUnmergeValues(
3830     MachineInstr &I, MachineRegisterInfo &MRI) const {
3831   assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
3832          "unexpected opcode");
3833 
3834   // TODO: Handle unmerging into GPRs and from scalars to scalars.
3835   if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
3836           AArch64::FPRRegBankID ||
3837       RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3838           AArch64::FPRRegBankID) {
3839     LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
3840                          "currently unsupported.\n");
3841     return false;
3842   }
3843 
3844   // The last operand is the vector source register, and every other operand is
3845   // a register to unpack into.
3846   unsigned NumElts = I.getNumOperands() - 1;
3847   Register SrcReg = I.getOperand(NumElts).getReg();
3848   const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3849   const LLT WideTy = MRI.getType(SrcReg);
3850   (void)WideTy;
3851   assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
3852          "can only unmerge from vector or s128 types!");
3853   assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
3854          "source register size too small!");
3855 
3856   if (!NarrowTy.isScalar())
3857     return selectSplitVectorUnmerge(I, MRI);
3858 
3859   MachineIRBuilder MIB(I);
3860 
3861   // Choose a lane copy opcode and subregister based off of the size of the
3862   // vector's elements.
3863   unsigned CopyOpc = 0;
3864   unsigned ExtractSubReg = 0;
3865   if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
3866     return false;
3867 
3868   // Set up for the lane copies.
3869   MachineBasicBlock &MBB = *I.getParent();
3870 
3871   // Stores the registers we'll be copying from.
3872   SmallVector<Register, 4> InsertRegs;
3873 
3874   // We'll use the first register twice, so we only need NumElts-1 registers.
3875   unsigned NumInsertRegs = NumElts - 1;
3876 
3877   // If our elements fit into exactly 128 bits, then we can copy from the source
3878   // directly. Otherwise, we need to do a bit of setup with some subregister
3879   // inserts.
3880   if (NarrowTy.getSizeInBits() * NumElts == 128) {
3881     InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
3882   } else {
3883     // No. We have to perform subregister inserts. For each insert, create an
3884     // implicit def and a subregister insert, and save the register we create.
3885     for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
3886       Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
3887       MachineInstr &ImpDefMI =
3888           *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
3889                    ImpDefReg);
3890 
3891       // Now, create the subregister insert from SrcReg.
3892       Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
3893       MachineInstr &InsMI =
3894           *BuildMI(MBB, I, I.getDebugLoc(),
3895                    TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
3896                .addUse(ImpDefReg)
3897                .addUse(SrcReg)
3898                .addImm(AArch64::dsub);
3899 
3900       constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
3901       constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
3902 
3903       // Save the register so that we can copy from it after.
3904       InsertRegs.push_back(InsertReg);
3905     }
3906   }
3907 
3908   // Now that we've created any necessary subregister inserts, we can
3909   // create the copies.
3910   //
3911   // Perform the first copy separately as a subregister copy.
3912   Register CopyTo = I.getOperand(0).getReg();
3913   auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
3914                        .addReg(InsertRegs[0], 0, ExtractSubReg);
3915   constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
3916 
3917   // Now, perform the remaining copies as vector lane copies.
3918   unsigned LaneIdx = 1;
3919   for (Register InsReg : InsertRegs) {
3920     Register CopyTo = I.getOperand(LaneIdx).getReg();
3921     MachineInstr &CopyInst =
3922         *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
3923              .addUse(InsReg)
3924              .addImm(LaneIdx);
3925     constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
3926     ++LaneIdx;
3927   }
3928 
3929   // Separately constrain the first copy's destination. Because of the
3930   // limitation in constrainOperandRegClass, we can't guarantee that this will
3931   // actually be constrained. So, do it ourselves using the second operand.
3932   const TargetRegisterClass *RC =
3933       MRI.getRegClassOrNull(I.getOperand(1).getReg());
3934   if (!RC) {
3935     LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
3936     return false;
3937   }
3938 
3939   RBI.constrainGenericRegister(CopyTo, *RC, MRI);
3940   I.eraseFromParent();
3941   return true;
3942 }
3943 
3944 bool AArch64InstructionSelector::selectConcatVectors(
3945     MachineInstr &I, MachineRegisterInfo &MRI) const {
3946   assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
3947          "Unexpected opcode");
3948   Register Dst = I.getOperand(0).getReg();
3949   Register Op1 = I.getOperand(1).getReg();
3950   Register Op2 = I.getOperand(2).getReg();
3951   MachineIRBuilder MIRBuilder(I);
3952   MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
3953   if (!ConcatMI)
3954     return false;
3955   I.eraseFromParent();
3956   return true;
3957 }
3958 
3959 unsigned
3960 AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
3961                                                   MachineFunction &MF) const {
3962   Type *CPTy = CPVal->getType();
3963   Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
3964 
3965   MachineConstantPool *MCP = MF.getConstantPool();
3966   return MCP->getConstantPoolIndex(CPVal, Alignment);
3967 }
3968 
3969 MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
3970     const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
3971   unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
3972 
3973   auto Adrp =
3974       MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
3975           .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
3976 
3977   MachineInstr *LoadMI = nullptr;
3978   switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
3979   case 16:
3980     LoadMI =
3981         &*MIRBuilder
3982               .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
3983               .addConstantPoolIndex(CPIdx, 0,
3984                                     AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3985     break;
3986   case 8:
3987     LoadMI = &*MIRBuilder
3988                  .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
3989                  .addConstantPoolIndex(
3990                      CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3991     break;
3992   default:
3993     LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
3994                       << *CPVal->getType());
3995     return nullptr;
3996   }
3997   constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
3998   constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
3999   return LoadMI;
4000 }
4001 
4002 /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4003 /// size and RB.
4004 static std::pair<unsigned, unsigned>
4005 getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4006   unsigned Opc, SubregIdx;
4007   if (RB.getID() == AArch64::GPRRegBankID) {
4008     if (EltSize == 16) {
4009       Opc = AArch64::INSvi16gpr;
4010       SubregIdx = AArch64::ssub;
4011     } else if (EltSize == 32) {
4012       Opc = AArch64::INSvi32gpr;
4013       SubregIdx = AArch64::ssub;
4014     } else if (EltSize == 64) {
4015       Opc = AArch64::INSvi64gpr;
4016       SubregIdx = AArch64::dsub;
4017     } else {
4018       llvm_unreachable("invalid elt size!");
4019     }
4020   } else {
4021     if (EltSize == 8) {
4022       Opc = AArch64::INSvi8lane;
4023       SubregIdx = AArch64::bsub;
4024     } else if (EltSize == 16) {
4025       Opc = AArch64::INSvi16lane;
4026       SubregIdx = AArch64::hsub;
4027     } else if (EltSize == 32) {
4028       Opc = AArch64::INSvi32lane;
4029       SubregIdx = AArch64::ssub;
4030     } else if (EltSize == 64) {
4031       Opc = AArch64::INSvi64lane;
4032       SubregIdx = AArch64::dsub;
4033     } else {
4034       llvm_unreachable("invalid elt size!");
4035     }
4036   }
4037   return std::make_pair(Opc, SubregIdx);
4038 }
4039 
4040 MachineInstr *AArch64InstructionSelector::emitInstr(
4041     unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4042     std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4043     const ComplexRendererFns &RenderFns) const {
4044   assert(Opcode && "Expected an opcode?");
4045   assert(!isPreISelGenericOpcode(Opcode) &&
4046          "Function should only be used to produce selected instructions!");
4047   auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4048   if (RenderFns)
4049     for (auto &Fn : *RenderFns)
4050       Fn(MI);
4051   constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
4052   return &*MI;
4053 }
4054 
4055 MachineInstr *AArch64InstructionSelector::emitAddSub(
4056     const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4057     Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4058     MachineIRBuilder &MIRBuilder) const {
4059   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4060   assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4061   auto Ty = MRI.getType(LHS.getReg());
4062   assert(!Ty.isVector() && "Expected a scalar or pointer?");
4063   unsigned Size = Ty.getSizeInBits();
4064   assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
4065   bool Is32Bit = Size == 32;
4066 
4067   // INSTRri form with positive arithmetic immediate.
4068   if (auto Fns = selectArithImmed(RHS))
4069     return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4070                      MIRBuilder, Fns);
4071 
4072   // INSTRri form with negative arithmetic immediate.
4073   if (auto Fns = selectNegArithImmed(RHS))
4074     return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4075                      MIRBuilder, Fns);
4076 
4077   // INSTRrx form.
4078   if (auto Fns = selectArithExtendedRegister(RHS))
4079     return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4080                      MIRBuilder, Fns);
4081 
4082   // INSTRrs form.
4083   if (auto Fns = selectShiftedRegister(RHS))
4084     return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4085                      MIRBuilder, Fns);
4086   return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4087                    MIRBuilder);
4088 }
4089 
4090 MachineInstr *
4091 AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4092                                     MachineOperand &RHS,
4093                                     MachineIRBuilder &MIRBuilder) const {
4094   const std::array<std::array<unsigned, 2>, 5> OpcTable{
4095       {{AArch64::ADDXri, AArch64::ADDWri},
4096        {AArch64::ADDXrs, AArch64::ADDWrs},
4097        {AArch64::ADDXrr, AArch64::ADDWrr},
4098        {AArch64::SUBXri, AArch64::SUBWri},
4099        {AArch64::ADDXrx, AArch64::ADDWrx}}};
4100   return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4101 }
4102 
4103 MachineInstr *
4104 AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4105                                      MachineOperand &RHS,
4106                                      MachineIRBuilder &MIRBuilder) const {
4107   const std::array<std::array<unsigned, 2>, 5> OpcTable{
4108       {{AArch64::ADDSXri, AArch64::ADDSWri},
4109        {AArch64::ADDSXrs, AArch64::ADDSWrs},
4110        {AArch64::ADDSXrr, AArch64::ADDSWrr},
4111        {AArch64::SUBSXri, AArch64::SUBSWri},
4112        {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4113   return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4114 }
4115 
4116 MachineInstr *
4117 AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4118                                      MachineOperand &RHS,
4119                                      MachineIRBuilder &MIRBuilder) const {
4120   const std::array<std::array<unsigned, 2>, 5> OpcTable{
4121       {{AArch64::SUBSXri, AArch64::SUBSWri},
4122        {AArch64::SUBSXrs, AArch64::SUBSWrs},
4123        {AArch64::SUBSXrr, AArch64::SUBSWrr},
4124        {AArch64::ADDSXri, AArch64::ADDSWri},
4125        {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4126   return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4127 }
4128 
4129 MachineInstr *
4130 AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4131                                     MachineIRBuilder &MIRBuilder) const {
4132   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4133   bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4134   auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4135   return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4136 }
4137 
4138 MachineInstr *
4139 AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4140                                     MachineIRBuilder &MIRBuilder) const {
4141   assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4142   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4143   LLT Ty = MRI.getType(LHS.getReg());
4144   unsigned RegSize = Ty.getSizeInBits();
4145   bool Is32Bit = (RegSize == 32);
4146   const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4147                                    {AArch64::ANDSXrs, AArch64::ANDSWrs},
4148                                    {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4149   // ANDS needs a logical immediate for its immediate form. Check if we can
4150   // fold one in.
4151   if (auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4152     int64_t Imm = ValAndVReg->Value.getSExtValue();
4153 
4154     if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {
4155       auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4156       TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
4157       constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
4158       return &*TstMI;
4159     }
4160   }
4161 
4162   if (auto Fns = selectLogicalShiftedRegister(RHS))
4163     return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4164   return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4165 }
4166 
4167 MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4168     MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4169     MachineIRBuilder &MIRBuilder) const {
4170   assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
4171   assert(Predicate.isPredicate() && "Expected predicate?");
4172   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4173   LLT CmpTy = MRI.getType(LHS.getReg());
4174   assert(!CmpTy.isVector() && "Expected scalar or pointer");
4175   unsigned Size = CmpTy.getSizeInBits();
4176   (void)Size;
4177   assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
4178   // Fold the compare into a cmn or tst if possible.
4179   if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4180     return FoldCmp;
4181   auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4182   return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4183 }
4184 
4185 MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4186     Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4187   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4188 #ifndef NDEBUG
4189   LLT Ty = MRI.getType(Dst);
4190   assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
4191          "Expected a 32-bit scalar register?");
4192 #endif
4193   const Register ZeroReg = AArch64::WZR;
4194   auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) {
4195     auto CSet =
4196         MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg})
4197             .addImm(getInvertedCondCode(CC));
4198     constrainSelectedInstRegOperands(*CSet, TII, TRI, RBI);
4199     return &*CSet;
4200   };
4201 
4202   AArch64CC::CondCode CC1, CC2;
4203   changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4204   if (CC2 == AArch64CC::AL)
4205     return EmitCSet(Dst, CC1);
4206 
4207   const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4208   Register Def1Reg = MRI.createVirtualRegister(RC);
4209   Register Def2Reg = MRI.createVirtualRegister(RC);
4210   EmitCSet(Def1Reg, CC1);
4211   EmitCSet(Def2Reg, CC2);
4212   auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4213   constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
4214   return &*OrMI;
4215 }
4216 
4217 MachineInstr *
4218 AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
4219                                           MachineIRBuilder &MIRBuilder,
4220                                           Optional<CmpInst::Predicate> Pred) const {
4221   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4222   LLT Ty = MRI.getType(LHS);
4223   if (Ty.isVector())
4224     return nullptr;
4225   unsigned OpSize = Ty.getSizeInBits();
4226   if (OpSize != 32 && OpSize != 64)
4227     return nullptr;
4228 
4229   // If this is a compare against +0.0, then we don't have
4230   // to explicitly materialize a constant.
4231   const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4232   bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4233 
4234   auto IsEqualityPred = [](CmpInst::Predicate P) {
4235     return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4236            P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;
4237   };
4238   if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4239     // Try commutating the operands.
4240     const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4241     if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4242       ShouldUseImm = true;
4243       std::swap(LHS, RHS);
4244     }
4245   }
4246   unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
4247                               {AArch64::FCMPSri, AArch64::FCMPDri}};
4248   unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
4249 
4250   // Partially build the compare. Decide if we need to add a use for the
4251   // third operand based off whether or not we're comparing against 0.0.
4252   auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4253   if (!ShouldUseImm)
4254     CmpMI.addUse(RHS);
4255   constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
4256   return &*CmpMI;
4257 }
4258 
4259 MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4260     Optional<Register> Dst, Register Op1, Register Op2,
4261     MachineIRBuilder &MIRBuilder) const {
4262   // We implement a vector concat by:
4263   // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4264   // 2. Insert the upper vector into the destination's upper element
4265   // TODO: some of this code is common with G_BUILD_VECTOR handling.
4266   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4267 
4268   const LLT Op1Ty = MRI.getType(Op1);
4269   const LLT Op2Ty = MRI.getType(Op2);
4270 
4271   if (Op1Ty != Op2Ty) {
4272     LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
4273     return nullptr;
4274   }
4275   assert(Op1Ty.isVector() && "Expected a vector for vector concat");
4276 
4277   if (Op1Ty.getSizeInBits() >= 128) {
4278     LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
4279     return nullptr;
4280   }
4281 
4282   // At the moment we just support 64 bit vector concats.
4283   if (Op1Ty.getSizeInBits() != 64) {
4284     LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
4285     return nullptr;
4286   }
4287 
4288   const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4289   const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4290   const TargetRegisterClass *DstRC =
4291       getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
4292 
4293   MachineInstr *WidenedOp1 =
4294       emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4295   MachineInstr *WidenedOp2 =
4296       emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4297   if (!WidenedOp1 || !WidenedOp2) {
4298     LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
4299     return nullptr;
4300   }
4301 
4302   // Now do the insert of the upper element.
4303   unsigned InsertOpc, InsSubRegIdx;
4304   std::tie(InsertOpc, InsSubRegIdx) =
4305       getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4306 
4307   if (!Dst)
4308     Dst = MRI.createVirtualRegister(DstRC);
4309   auto InsElt =
4310       MIRBuilder
4311           .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4312           .addImm(1) /* Lane index */
4313           .addUse(WidenedOp2->getOperand(0).getReg())
4314           .addImm(0);
4315   constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4316   return &*InsElt;
4317 }
4318 
4319 MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
4320     MachineInstr &I, MachineRegisterInfo &MRI) const {
4321   assert(I.getOpcode() == TargetOpcode::G_FCONSTANT &&
4322          "Expected a G_FCONSTANT!");
4323   MachineOperand &ImmOp = I.getOperand(1);
4324   unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
4325 
4326   // Only handle 32 and 64 bit defs for now.
4327   if (DefSize != 32 && DefSize != 64)
4328     return nullptr;
4329 
4330   // Don't handle null values using FMOV.
4331   if (ImmOp.getFPImm()->isNullValue())
4332     return nullptr;
4333 
4334   // Get the immediate representation for the FMOV.
4335   const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF();
4336   int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF)
4337                           : AArch64_AM::getFP64Imm(ImmValAPF);
4338 
4339   // If this is -1, it means the immediate can't be represented as the requested
4340   // floating point value. Bail.
4341   if (Imm == -1)
4342     return nullptr;
4343 
4344   // Update MI to represent the new FMOV instruction, constrain it, and return.
4345   ImmOp.ChangeToImmediate(Imm);
4346   unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi;
4347   I.setDesc(TII.get(MovOpc));
4348   constrainSelectedInstRegOperands(I, TII, TRI, RBI);
4349   return &I;
4350 }
4351 
4352 MachineInstr *
4353 AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
4354                                      MachineIRBuilder &MIRBuilder) const {
4355   // CSINC increments the result when the predicate is false. Invert it.
4356   const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
4357       CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
4358   auto I =
4359       MIRBuilder
4360     .buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)})
4361           .addImm(InvCC);
4362   constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
4363   return &*I;
4364 }
4365 
4366 std::pair<MachineInstr *, AArch64CC::CondCode>
4367 AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4368                                            MachineOperand &LHS,
4369                                            MachineOperand &RHS,
4370                                            MachineIRBuilder &MIRBuilder) const {
4371   switch (Opcode) {
4372   default:
4373     llvm_unreachable("Unexpected opcode!");
4374   case TargetOpcode::G_SADDO:
4375     return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4376   case TargetOpcode::G_UADDO:
4377     return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4378   case TargetOpcode::G_SSUBO:
4379     return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4380   case TargetOpcode::G_USUBO:
4381     return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4382   }
4383 }
4384 
4385 bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
4386   MachineIRBuilder MIB(I);
4387   MachineRegisterInfo &MRI = *MIB.getMRI();
4388   // We want to recognize this pattern:
4389   //
4390   // $z = G_FCMP pred, $x, $y
4391   // ...
4392   // $w = G_SELECT $z, $a, $b
4393   //
4394   // Where the value of $z is *only* ever used by the G_SELECT (possibly with
4395   // some copies/truncs in between.)
4396   //
4397   // If we see this, then we can emit something like this:
4398   //
4399   // fcmp $x, $y
4400   // fcsel $w, $a, $b, pred
4401   //
4402   // Rather than emitting both of the rather long sequences in the standard
4403   // G_FCMP/G_SELECT select methods.
4404 
4405   // First, check if the condition is defined by a compare.
4406   MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
4407   while (CondDef) {
4408     // We can only fold if all of the defs have one use.
4409     Register CondDefReg = CondDef->getOperand(0).getReg();
4410     if (!MRI.hasOneNonDBGUse(CondDefReg)) {
4411       // Unless it's another select.
4412       for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
4413         if (CondDef == &UI)
4414           continue;
4415         if (UI.getOpcode() != TargetOpcode::G_SELECT)
4416           return false;
4417       }
4418     }
4419 
4420     // We can skip over G_TRUNC since the condition is 1-bit.
4421     // Truncating/extending can have no impact on the value.
4422     unsigned Opc = CondDef->getOpcode();
4423     if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
4424       break;
4425 
4426     // Can't see past copies from physregs.
4427     if (Opc == TargetOpcode::COPY &&
4428         Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
4429       return false;
4430 
4431     CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
4432   }
4433 
4434   // Is the condition defined by a compare?
4435   if (!CondDef)
4436     return false;
4437 
4438   unsigned CondOpc = CondDef->getOpcode();
4439   if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
4440     return false;
4441 
4442   AArch64CC::CondCode CondCode;
4443   if (CondOpc == TargetOpcode::G_ICMP) {
4444     auto Pred =
4445         static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4446     CondCode = changeICMPPredToAArch64CC(Pred);
4447     emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
4448                        CondDef->getOperand(1), MIB);
4449   } else {
4450     // Get the condition code for the select.
4451     auto Pred =
4452         static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4453     AArch64CC::CondCode CondCode2;
4454     changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
4455 
4456     // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
4457     // instructions to emit the comparison.
4458     // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
4459     // unnecessary.
4460     if (CondCode2 != AArch64CC::AL)
4461       return false;
4462 
4463     if (!emitFPCompare(CondDef->getOperand(2).getReg(),
4464                        CondDef->getOperand(3).getReg(), MIB)) {
4465       LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
4466       return false;
4467     }
4468   }
4469 
4470   // Emit the select.
4471   emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
4472              I.getOperand(3).getReg(), CondCode, MIB);
4473   I.eraseFromParent();
4474   return true;
4475 }
4476 
4477 MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
4478     MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4479     MachineIRBuilder &MIRBuilder) const {
4480   assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
4481          "Unexpected MachineOperand");
4482   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4483   // We want to find this sort of thing:
4484   // x = G_SUB 0, y
4485   // G_ICMP z, x
4486   //
4487   // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
4488   // e.g:
4489   //
4490   // cmn z, y
4491 
4492   // Helper lambda to detect the subtract followed by the compare.
4493   // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0.
4494   auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) {
4495     if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB)
4496       return false;
4497 
4498     // Need to make sure NZCV is the same at the end of the transformation.
4499     if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
4500       return false;
4501 
4502     // We want to match against SUBs.
4503     if (DefMI->getOpcode() != TargetOpcode::G_SUB)
4504       return false;
4505 
4506     // Make sure that we're getting
4507     // x = G_SUB 0, y
4508     auto ValAndVReg =
4509         getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI);
4510     if (!ValAndVReg || ValAndVReg->Value != 0)
4511       return false;
4512 
4513     // This can safely be represented as a CMN.
4514     return true;
4515   };
4516 
4517   // Check if the RHS or LHS of the G_ICMP is defined by a SUB
4518   MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
4519   MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
4520   CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
4521   const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P);
4522 
4523   // Given this:
4524   //
4525   // x = G_SUB 0, y
4526   // G_ICMP x, z
4527   //
4528   // Produce this:
4529   //
4530   // cmn y, z
4531   if (IsCMN(LHSDef, CC))
4532     return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
4533 
4534   // Same idea here, but with the RHS of the compare instead:
4535   //
4536   // Given this:
4537   //
4538   // x = G_SUB 0, y
4539   // G_ICMP z, x
4540   //
4541   // Produce this:
4542   //
4543   // cmn z, y
4544   if (IsCMN(RHSDef, CC))
4545     return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
4546 
4547   // Given this:
4548   //
4549   // z = G_AND x, y
4550   // G_ICMP z, 0
4551   //
4552   // Produce this if the compare is signed:
4553   //
4554   // tst x, y
4555   if (!CmpInst::isUnsigned(P) && LHSDef &&
4556       LHSDef->getOpcode() == TargetOpcode::G_AND) {
4557     // Make sure that the RHS is 0.
4558     auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
4559     if (!ValAndVReg || ValAndVReg->Value != 0)
4560       return nullptr;
4561 
4562     return emitTST(LHSDef->getOperand(1),
4563                    LHSDef->getOperand(2), MIRBuilder);
4564   }
4565 
4566   return nullptr;
4567 }
4568 
4569 bool AArch64InstructionSelector::selectShuffleVector(
4570     MachineInstr &I, MachineRegisterInfo &MRI) const {
4571   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4572   Register Src1Reg = I.getOperand(1).getReg();
4573   const LLT Src1Ty = MRI.getType(Src1Reg);
4574   Register Src2Reg = I.getOperand(2).getReg();
4575   const LLT Src2Ty = MRI.getType(Src2Reg);
4576   ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
4577 
4578   MachineBasicBlock &MBB = *I.getParent();
4579   MachineFunction &MF = *MBB.getParent();
4580   LLVMContext &Ctx = MF.getFunction().getContext();
4581 
4582   // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
4583   // it's originated from a <1 x T> type. Those should have been lowered into
4584   // G_BUILD_VECTOR earlier.
4585   if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
4586     LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
4587     return false;
4588   }
4589 
4590   unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
4591 
4592   SmallVector<Constant *, 64> CstIdxs;
4593   for (int Val : Mask) {
4594     // For now, any undef indexes we'll just assume to be 0. This should be
4595     // optimized in future, e.g. to select DUP etc.
4596     Val = Val < 0 ? 0 : Val;
4597     for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
4598       unsigned Offset = Byte + Val * BytesPerElt;
4599       CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
4600     }
4601   }
4602 
4603   MachineIRBuilder MIRBuilder(I);
4604 
4605   // Use a constant pool to load the index vector for TBL.
4606   Constant *CPVal = ConstantVector::get(CstIdxs);
4607   MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
4608   if (!IndexLoad) {
4609     LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
4610     return false;
4611   }
4612 
4613   if (DstTy.getSizeInBits() != 128) {
4614     assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
4615     // This case can be done with TBL1.
4616     MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
4617     if (!Concat) {
4618       LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
4619       return false;
4620     }
4621 
4622     // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
4623     IndexLoad =
4624         emitScalarToVector(64, &AArch64::FPR128RegClass,
4625                            IndexLoad->getOperand(0).getReg(), MIRBuilder);
4626 
4627     auto TBL1 = MIRBuilder.buildInstr(
4628         AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
4629         {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
4630     constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
4631 
4632     auto Copy =
4633         MIRBuilder
4634             .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
4635             .addReg(TBL1.getReg(0), 0, AArch64::dsub);
4636     RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
4637     I.eraseFromParent();
4638     return true;
4639   }
4640 
4641   // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
4642   // Q registers for regalloc.
4643   auto RegSeq = MIRBuilder
4644                     .buildInstr(TargetOpcode::REG_SEQUENCE,
4645                                 {&AArch64::QQRegClass}, {Src1Reg})
4646                     .addImm(AArch64::qsub0)
4647                     .addUse(Src2Reg)
4648                     .addImm(AArch64::qsub1);
4649 
4650   auto TBL2 = MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
4651                                     {RegSeq, IndexLoad->getOperand(0)});
4652   constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
4653   constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
4654   I.eraseFromParent();
4655   return true;
4656 }
4657 
4658 MachineInstr *AArch64InstructionSelector::emitLaneInsert(
4659     Optional<Register> DstReg, Register SrcReg, Register EltReg,
4660     unsigned LaneIdx, const RegisterBank &RB,
4661     MachineIRBuilder &MIRBuilder) const {
4662   MachineInstr *InsElt = nullptr;
4663   const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
4664   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4665 
4666   // Create a register to define with the insert if one wasn't passed in.
4667   if (!DstReg)
4668     DstReg = MRI.createVirtualRegister(DstRC);
4669 
4670   unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
4671   unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
4672 
4673   if (RB.getID() == AArch64::FPRRegBankID) {
4674     auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
4675     InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4676                  .addImm(LaneIdx)
4677                  .addUse(InsSub->getOperand(0).getReg())
4678                  .addImm(0);
4679   } else {
4680     InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4681                  .addImm(LaneIdx)
4682                  .addUse(EltReg);
4683   }
4684 
4685   constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4686   return InsElt;
4687 }
4688 
4689 bool AArch64InstructionSelector::selectInsertElt(
4690     MachineInstr &I, MachineRegisterInfo &MRI) const {
4691   assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
4692 
4693   // Get information on the destination.
4694   Register DstReg = I.getOperand(0).getReg();
4695   const LLT DstTy = MRI.getType(DstReg);
4696   unsigned VecSize = DstTy.getSizeInBits();
4697 
4698   // Get information on the element we want to insert into the destination.
4699   Register EltReg = I.getOperand(2).getReg();
4700   const LLT EltTy = MRI.getType(EltReg);
4701   unsigned EltSize = EltTy.getSizeInBits();
4702   if (EltSize < 16 || EltSize > 64)
4703     return false; // Don't support all element types yet.
4704 
4705   // Find the definition of the index. Bail out if it's not defined by a
4706   // G_CONSTANT.
4707   Register IdxReg = I.getOperand(3).getReg();
4708   auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
4709   if (!VRegAndVal)
4710     return false;
4711   unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4712 
4713   // Perform the lane insert.
4714   Register SrcReg = I.getOperand(1).getReg();
4715   const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
4716   MachineIRBuilder MIRBuilder(I);
4717 
4718   if (VecSize < 128) {
4719     // If the vector we're inserting into is smaller than 128 bits, widen it
4720     // to 128 to do the insert.
4721     MachineInstr *ScalarToVec = emitScalarToVector(
4722         VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
4723     if (!ScalarToVec)
4724       return false;
4725     SrcReg = ScalarToVec->getOperand(0).getReg();
4726   }
4727 
4728   // Create an insert into a new FPR128 register.
4729   // Note that if our vector is already 128 bits, we end up emitting an extra
4730   // register.
4731   MachineInstr *InsMI =
4732       emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
4733 
4734   if (VecSize < 128) {
4735     // If we had to widen to perform the insert, then we have to demote back to
4736     // the original size to get the result we want.
4737     Register DemoteVec = InsMI->getOperand(0).getReg();
4738     const TargetRegisterClass *RC =
4739         getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
4740     if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
4741       LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
4742       return false;
4743     }
4744     unsigned SubReg = 0;
4745     if (!getSubRegForClass(RC, TRI, SubReg))
4746       return false;
4747     if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
4748       LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
4749                         << "\n");
4750       return false;
4751     }
4752     MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
4753         .addReg(DemoteVec, 0, SubReg);
4754     RBI.constrainGenericRegister(DstReg, *RC, MRI);
4755   } else {
4756     // No widening needed.
4757     InsMI->getOperand(0).setReg(DstReg);
4758     constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
4759   }
4760 
4761   I.eraseFromParent();
4762   return true;
4763 }
4764 
4765 bool AArch64InstructionSelector::tryOptConstantBuildVec(
4766     MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) const {
4767   assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4768   unsigned DstSize = DstTy.getSizeInBits();
4769   assert(DstSize <= 128 && "Unexpected build_vec type!");
4770   if (DstSize < 32)
4771     return false;
4772   // Check if we're building a constant vector, in which case we want to
4773   // generate a constant pool load instead of a vector insert sequence.
4774   SmallVector<Constant *, 16> Csts;
4775   for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
4776     // Try to find G_CONSTANT or G_FCONSTANT
4777     auto *OpMI =
4778         getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
4779     if (OpMI)
4780       Csts.emplace_back(
4781           const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
4782     else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
4783                                   I.getOperand(Idx).getReg(), MRI)))
4784       Csts.emplace_back(
4785           const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
4786     else
4787       return false;
4788   }
4789   Constant *CV = ConstantVector::get(Csts);
4790   MachineIRBuilder MIB(I);
4791   if (CV->isNullValue()) {
4792     // Until the importer can support immAllZerosV in pattern leaf nodes,
4793     // select a zero move manually here.
4794     Register DstReg = I.getOperand(0).getReg();
4795     if (DstSize == 128) {
4796       auto Mov = MIB.buildInstr(AArch64::MOVIv2d_ns, {DstReg}, {}).addImm(0);
4797       I.eraseFromParent();
4798       return constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
4799     } else if (DstSize == 64) {
4800       auto Mov =
4801           MIB.buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
4802               .addImm(0);
4803       MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
4804           .addReg(Mov.getReg(0), 0, AArch64::dsub);
4805       I.eraseFromParent();
4806       return RBI.constrainGenericRegister(DstReg, AArch64::FPR64RegClass, MRI);
4807     }
4808   }
4809   auto *CPLoad = emitLoadFromConstantPool(CV, MIB);
4810   if (!CPLoad) {
4811     LLVM_DEBUG(dbgs() << "Could not generate cp load for build_vector");
4812     return false;
4813   }
4814   MIB.buildCopy(I.getOperand(0), CPLoad->getOperand(0));
4815   RBI.constrainGenericRegister(I.getOperand(0).getReg(),
4816                                *MRI.getRegClass(CPLoad->getOperand(0).getReg()),
4817                                MRI);
4818   I.eraseFromParent();
4819   return true;
4820 }
4821 
4822 bool AArch64InstructionSelector::selectBuildVector(
4823     MachineInstr &I, MachineRegisterInfo &MRI) const {
4824   assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4825   // Until we port more of the optimized selections, for now just use a vector
4826   // insert sequence.
4827   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4828   const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
4829   unsigned EltSize = EltTy.getSizeInBits();
4830 
4831   if (tryOptConstantBuildVec(I, DstTy, MRI))
4832     return true;
4833   if (EltSize < 16 || EltSize > 64)
4834     return false; // Don't support all element types yet.
4835   const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
4836   MachineIRBuilder MIRBuilder(I);
4837 
4838   const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
4839   MachineInstr *ScalarToVec =
4840       emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
4841                          I.getOperand(1).getReg(), MIRBuilder);
4842   if (!ScalarToVec)
4843     return false;
4844 
4845   Register DstVec = ScalarToVec->getOperand(0).getReg();
4846   unsigned DstSize = DstTy.getSizeInBits();
4847 
4848   // Keep track of the last MI we inserted. Later on, we might be able to save
4849   // a copy using it.
4850   MachineInstr *PrevMI = nullptr;
4851   for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
4852     // Note that if we don't do a subregister copy, we can end up making an
4853     // extra register.
4854     PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
4855                               MIRBuilder);
4856     DstVec = PrevMI->getOperand(0).getReg();
4857   }
4858 
4859   // If DstTy's size in bits is less than 128, then emit a subregister copy
4860   // from DstVec to the last register we've defined.
4861   if (DstSize < 128) {
4862     // Force this to be FPR using the destination vector.
4863     const TargetRegisterClass *RC =
4864         getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
4865     if (!RC)
4866       return false;
4867     if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
4868       LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
4869       return false;
4870     }
4871 
4872     unsigned SubReg = 0;
4873     if (!getSubRegForClass(RC, TRI, SubReg))
4874       return false;
4875     if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
4876       LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
4877                         << "\n");
4878       return false;
4879     }
4880 
4881     Register Reg = MRI.createVirtualRegister(RC);
4882     Register DstReg = I.getOperand(0).getReg();
4883 
4884     MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
4885         .addReg(DstVec, 0, SubReg);
4886     MachineOperand &RegOp = I.getOperand(1);
4887     RegOp.setReg(Reg);
4888     RBI.constrainGenericRegister(DstReg, *RC, MRI);
4889   } else {
4890     // We don't need a subregister copy. Save a copy by re-using the
4891     // destination register on the final insert.
4892     assert(PrevMI && "PrevMI was null?");
4893     PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
4894     constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
4895   }
4896 
4897   I.eraseFromParent();
4898   return true;
4899 }
4900 
4901 /// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
4902 /// ID if it exists, and 0 otherwise.
4903 static unsigned findIntrinsicID(MachineInstr &I) {
4904   auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
4905     return Op.isIntrinsicID();
4906   });
4907   if (IntrinOp == I.operands_end())
4908     return 0;
4909   return IntrinOp->getIntrinsicID();
4910 }
4911 
4912 bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
4913     MachineInstr &I, MachineRegisterInfo &MRI) const {
4914   // Find the intrinsic ID.
4915   unsigned IntrinID = findIntrinsicID(I);
4916   if (!IntrinID)
4917     return false;
4918   MachineIRBuilder MIRBuilder(I);
4919 
4920   // Select the instruction.
4921   switch (IntrinID) {
4922   default:
4923     return false;
4924   case Intrinsic::trap:
4925     MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
4926     break;
4927   case Intrinsic::debugtrap:
4928     MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
4929     break;
4930   case Intrinsic::ubsantrap:
4931     MIRBuilder.buildInstr(AArch64::BRK, {}, {})
4932         .addImm(I.getOperand(1).getImm() | ('U' << 8));
4933     break;
4934   }
4935 
4936   I.eraseFromParent();
4937   return true;
4938 }
4939 
4940 bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
4941                                                  MachineRegisterInfo &MRI) {
4942   unsigned IntrinID = findIntrinsicID(I);
4943   if (!IntrinID)
4944     return false;
4945   MachineIRBuilder MIRBuilder(I);
4946 
4947   switch (IntrinID) {
4948   default:
4949     break;
4950   case Intrinsic::aarch64_crypto_sha1h: {
4951     Register DstReg = I.getOperand(0).getReg();
4952     Register SrcReg = I.getOperand(2).getReg();
4953 
4954     // FIXME: Should this be an assert?
4955     if (MRI.getType(DstReg).getSizeInBits() != 32 ||
4956         MRI.getType(SrcReg).getSizeInBits() != 32)
4957       return false;
4958 
4959     // The operation has to happen on FPRs. Set up some new FPR registers for
4960     // the source and destination if they are on GPRs.
4961     if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4962       SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
4963       MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)});
4964 
4965       // Make sure the copy ends up getting constrained properly.
4966       RBI.constrainGenericRegister(I.getOperand(2).getReg(),
4967                                    AArch64::GPR32RegClass, MRI);
4968     }
4969 
4970     if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
4971       DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
4972 
4973     // Actually insert the instruction.
4974     auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
4975     constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
4976 
4977     // Did we create a new register for the destination?
4978     if (DstReg != I.getOperand(0).getReg()) {
4979       // Yep. Copy the result of the instruction back into the original
4980       // destination.
4981       MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg});
4982       RBI.constrainGenericRegister(I.getOperand(0).getReg(),
4983                                    AArch64::GPR32RegClass, MRI);
4984     }
4985 
4986     I.eraseFromParent();
4987     return true;
4988   }
4989   case Intrinsic::frameaddress:
4990   case Intrinsic::returnaddress: {
4991     MachineFunction &MF = *I.getParent()->getParent();
4992     MachineFrameInfo &MFI = MF.getFrameInfo();
4993 
4994     unsigned Depth = I.getOperand(2).getImm();
4995     Register DstReg = I.getOperand(0).getReg();
4996     RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
4997 
4998     if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
4999       if (!MFReturnAddr) {
5000         // Insert the copy from LR/X30 into the entry block, before it can be
5001         // clobbered by anything.
5002         MFI.setReturnAddressIsTaken(true);
5003         MFReturnAddr = getFunctionLiveInPhysReg(MF, TII, AArch64::LR,
5004                                                 AArch64::GPR64RegClass);
5005       }
5006 
5007       if (STI.hasPAuth()) {
5008         MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
5009       } else {
5010         MIRBuilder.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
5011         MIRBuilder.buildInstr(AArch64::XPACLRI);
5012         MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
5013       }
5014 
5015       I.eraseFromParent();
5016       return true;
5017     }
5018 
5019     MFI.setFrameAddressIsTaken(true);
5020     Register FrameAddr(AArch64::FP);
5021     while (Depth--) {
5022       Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
5023       auto Ldr =
5024           MIRBuilder.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr})
5025               .addImm(0);
5026       constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI);
5027       FrameAddr = NextFrame;
5028     }
5029 
5030     if (IntrinID == Intrinsic::frameaddress)
5031       MIRBuilder.buildCopy({DstReg}, {FrameAddr});
5032     else {
5033       MFI.setReturnAddressIsTaken(true);
5034 
5035       if (STI.hasPAuth()) {
5036         Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
5037         MIRBuilder.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
5038         MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
5039       } else {
5040         MIRBuilder.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr}).addImm(1);
5041         MIRBuilder.buildInstr(AArch64::XPACLRI);
5042         MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
5043       }
5044     }
5045 
5046     I.eraseFromParent();
5047     return true;
5048   }
5049   }
5050   return false;
5051 }
5052 
5053 InstructionSelector::ComplexRendererFns
5054 AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
5055   auto MaybeImmed = getImmedFromMO(Root);
5056   if (MaybeImmed == None || *MaybeImmed > 31)
5057     return None;
5058   uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
5059   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5060 }
5061 
5062 InstructionSelector::ComplexRendererFns
5063 AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
5064   auto MaybeImmed = getImmedFromMO(Root);
5065   if (MaybeImmed == None || *MaybeImmed > 31)
5066     return None;
5067   uint64_t Enc = 31 - *MaybeImmed;
5068   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5069 }
5070 
5071 InstructionSelector::ComplexRendererFns
5072 AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
5073   auto MaybeImmed = getImmedFromMO(Root);
5074   if (MaybeImmed == None || *MaybeImmed > 63)
5075     return None;
5076   uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
5077   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5078 }
5079 
5080 InstructionSelector::ComplexRendererFns
5081 AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
5082   auto MaybeImmed = getImmedFromMO(Root);
5083   if (MaybeImmed == None || *MaybeImmed > 63)
5084     return None;
5085   uint64_t Enc = 63 - *MaybeImmed;
5086   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5087 }
5088 
5089 /// Helper to select an immediate value that can be represented as a 12-bit
5090 /// value shifted left by either 0 or 12. If it is possible to do so, return
5091 /// the immediate and shift value. If not, return None.
5092 ///
5093 /// Used by selectArithImmed and selectNegArithImmed.
5094 InstructionSelector::ComplexRendererFns
5095 AArch64InstructionSelector::select12BitValueWithLeftShift(
5096     uint64_t Immed) const {
5097   unsigned ShiftAmt;
5098   if (Immed >> 12 == 0) {
5099     ShiftAmt = 0;
5100   } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
5101     ShiftAmt = 12;
5102     Immed = Immed >> 12;
5103   } else
5104     return None;
5105 
5106   unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
5107   return {{
5108       [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
5109       [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
5110   }};
5111 }
5112 
5113 /// SelectArithImmed - Select an immediate value that can be represented as
5114 /// a 12-bit value shifted left by either 0 or 12.  If so, return true with
5115 /// Val set to the 12-bit value and Shift set to the shifter operand.
5116 InstructionSelector::ComplexRendererFns
5117 AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
5118   // This function is called from the addsub_shifted_imm ComplexPattern,
5119   // which lists [imm] as the list of opcode it's interested in, however
5120   // we still need to check whether the operand is actually an immediate
5121   // here because the ComplexPattern opcode list is only used in
5122   // root-level opcode matching.
5123   auto MaybeImmed = getImmedFromMO(Root);
5124   if (MaybeImmed == None)
5125     return None;
5126   return select12BitValueWithLeftShift(*MaybeImmed);
5127 }
5128 
5129 /// SelectNegArithImmed - As above, but negates the value before trying to
5130 /// select it.
5131 InstructionSelector::ComplexRendererFns
5132 AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
5133   // We need a register here, because we need to know if we have a 64 or 32
5134   // bit immediate.
5135   if (!Root.isReg())
5136     return None;
5137   auto MaybeImmed = getImmedFromMO(Root);
5138   if (MaybeImmed == None)
5139     return None;
5140   uint64_t Immed = *MaybeImmed;
5141 
5142   // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
5143   // have the opposite effect on the C flag, so this pattern mustn't match under
5144   // those circumstances.
5145   if (Immed == 0)
5146     return None;
5147 
5148   // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
5149   // the root.
5150   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5151   if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
5152     Immed = ~((uint32_t)Immed) + 1;
5153   else
5154     Immed = ~Immed + 1ULL;
5155 
5156   if (Immed & 0xFFFFFFFFFF000000ULL)
5157     return None;
5158 
5159   Immed &= 0xFFFFFFULL;
5160   return select12BitValueWithLeftShift(Immed);
5161 }
5162 
5163 /// Return true if it is worth folding MI into an extended register. That is,
5164 /// if it's safe to pull it into the addressing mode of a load or store as a
5165 /// shift.
5166 bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
5167     MachineInstr &MI, const MachineRegisterInfo &MRI) const {
5168   // Always fold if there is one use, or if we're optimizing for size.
5169   Register DefReg = MI.getOperand(0).getReg();
5170   if (MRI.hasOneNonDBGUse(DefReg) ||
5171       MI.getParent()->getParent()->getFunction().hasMinSize())
5172     return true;
5173 
5174   // It's better to avoid folding and recomputing shifts when we don't have a
5175   // fastpath.
5176   if (!STI.hasLSLFast())
5177     return false;
5178 
5179   // We have a fastpath, so folding a shift in and potentially computing it
5180   // many times may be beneficial. Check if this is only used in memory ops.
5181   // If it is, then we should fold.
5182   return all_of(MRI.use_nodbg_instructions(DefReg),
5183                 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
5184 }
5185 
5186 static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) {
5187   switch (Type) {
5188   case AArch64_AM::SXTB:
5189   case AArch64_AM::SXTH:
5190   case AArch64_AM::SXTW:
5191     return true;
5192   default:
5193     return false;
5194   }
5195 }
5196 
5197 InstructionSelector::ComplexRendererFns
5198 AArch64InstructionSelector::selectExtendedSHL(
5199     MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
5200     unsigned SizeInBytes, bool WantsExt) const {
5201   assert(Base.isReg() && "Expected base to be a register operand");
5202   assert(Offset.isReg() && "Expected offset to be a register operand");
5203 
5204   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5205   MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
5206   if (!OffsetInst)
5207     return None;
5208 
5209   unsigned OffsetOpc = OffsetInst->getOpcode();
5210   bool LookedThroughZExt = false;
5211   if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
5212     // Try to look through a ZEXT.
5213     if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
5214       return None;
5215 
5216     OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
5217     OffsetOpc = OffsetInst->getOpcode();
5218     LookedThroughZExt = true;
5219 
5220     if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
5221       return None;
5222   }
5223   // Make sure that the memory op is a valid size.
5224   int64_t LegalShiftVal = Log2_32(SizeInBytes);
5225   if (LegalShiftVal == 0)
5226     return None;
5227   if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
5228     return None;
5229 
5230   // Now, try to find the specific G_CONSTANT. Start by assuming that the
5231   // register we will offset is the LHS, and the register containing the
5232   // constant is the RHS.
5233   Register OffsetReg = OffsetInst->getOperand(1).getReg();
5234   Register ConstantReg = OffsetInst->getOperand(2).getReg();
5235   auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
5236   if (!ValAndVReg) {
5237     // We didn't get a constant on the RHS. If the opcode is a shift, then
5238     // we're done.
5239     if (OffsetOpc == TargetOpcode::G_SHL)
5240       return None;
5241 
5242     // If we have a G_MUL, we can use either register. Try looking at the RHS.
5243     std::swap(OffsetReg, ConstantReg);
5244     ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
5245     if (!ValAndVReg)
5246       return None;
5247   }
5248 
5249   // The value must fit into 3 bits, and must be positive. Make sure that is
5250   // true.
5251   int64_t ImmVal = ValAndVReg->Value.getSExtValue();
5252 
5253   // Since we're going to pull this into a shift, the constant value must be
5254   // a power of 2. If we got a multiply, then we need to check this.
5255   if (OffsetOpc == TargetOpcode::G_MUL) {
5256     if (!isPowerOf2_32(ImmVal))
5257       return None;
5258 
5259     // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
5260     ImmVal = Log2_32(ImmVal);
5261   }
5262 
5263   if ((ImmVal & 0x7) != ImmVal)
5264     return None;
5265 
5266   // We are only allowed to shift by LegalShiftVal. This shift value is built
5267   // into the instruction, so we can't just use whatever we want.
5268   if (ImmVal != LegalShiftVal)
5269     return None;
5270 
5271   unsigned SignExtend = 0;
5272   if (WantsExt) {
5273     // Check if the offset is defined by an extend, unless we looked through a
5274     // G_ZEXT earlier.
5275     if (!LookedThroughZExt) {
5276       MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
5277       auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
5278       if (Ext == AArch64_AM::InvalidShiftExtend)
5279         return None;
5280 
5281       SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
5282       // We only support SXTW for signed extension here.
5283       if (SignExtend && Ext != AArch64_AM::SXTW)
5284         return None;
5285       OffsetReg = ExtInst->getOperand(1).getReg();
5286     }
5287 
5288     // Need a 32-bit wide register here.
5289     MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
5290     OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
5291   }
5292 
5293   // We can use the LHS of the GEP as the base, and the LHS of the shift as an
5294   // offset. Signify that we are shifting by setting the shift flag to 1.
5295   return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
5296            [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
5297            [=](MachineInstrBuilder &MIB) {
5298              // Need to add both immediates here to make sure that they are both
5299              // added to the instruction.
5300              MIB.addImm(SignExtend);
5301              MIB.addImm(1);
5302            }}};
5303 }
5304 
5305 /// This is used for computing addresses like this:
5306 ///
5307 /// ldr x1, [x2, x3, lsl #3]
5308 ///
5309 /// Where x2 is the base register, and x3 is an offset register. The shift-left
5310 /// is a constant value specific to this load instruction. That is, we'll never
5311 /// see anything other than a 3 here (which corresponds to the size of the
5312 /// element being loaded.)
5313 InstructionSelector::ComplexRendererFns
5314 AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
5315     MachineOperand &Root, unsigned SizeInBytes) const {
5316   if (!Root.isReg())
5317     return None;
5318   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5319 
5320   // We want to find something like this:
5321   //
5322   // val = G_CONSTANT LegalShiftVal
5323   // shift = G_SHL off_reg val
5324   // ptr = G_PTR_ADD base_reg shift
5325   // x = G_LOAD ptr
5326   //
5327   // And fold it into this addressing mode:
5328   //
5329   // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
5330 
5331   // Check if we can find the G_PTR_ADD.
5332   MachineInstr *PtrAdd =
5333       getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5334   if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
5335     return None;
5336 
5337   // Now, try to match an opcode which will match our specific offset.
5338   // We want a G_SHL or a G_MUL.
5339   MachineInstr *OffsetInst =
5340       getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);
5341   return selectExtendedSHL(Root, PtrAdd->getOperand(1),
5342                            OffsetInst->getOperand(0), SizeInBytes,
5343                            /*WantsExt=*/false);
5344 }
5345 
5346 /// This is used for computing addresses like this:
5347 ///
5348 /// ldr x1, [x2, x3]
5349 ///
5350 /// Where x2 is the base register, and x3 is an offset register.
5351 ///
5352 /// When possible (or profitable) to fold a G_PTR_ADD into the address calculation,
5353 /// this will do so. Otherwise, it will return None.
5354 InstructionSelector::ComplexRendererFns
5355 AArch64InstructionSelector::selectAddrModeRegisterOffset(
5356     MachineOperand &Root) const {
5357   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5358 
5359   // We need a GEP.
5360   MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
5361   if (!Gep || Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
5362     return None;
5363 
5364   // If this is used more than once, let's not bother folding.
5365   // TODO: Check if they are memory ops. If they are, then we can still fold
5366   // without having to recompute anything.
5367   if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
5368     return None;
5369 
5370   // Base is the GEP's LHS, offset is its RHS.
5371   return {{[=](MachineInstrBuilder &MIB) {
5372              MIB.addUse(Gep->getOperand(1).getReg());
5373            },
5374            [=](MachineInstrBuilder &MIB) {
5375              MIB.addUse(Gep->getOperand(2).getReg());
5376            },
5377            [=](MachineInstrBuilder &MIB) {
5378              // Need to add both immediates here to make sure that they are both
5379              // added to the instruction.
5380              MIB.addImm(0);
5381              MIB.addImm(0);
5382            }}};
5383 }
5384 
5385 /// This is intended to be equivalent to selectAddrModeXRO in
5386 /// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
5387 InstructionSelector::ComplexRendererFns
5388 AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
5389                                               unsigned SizeInBytes) const {
5390   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5391   if (!Root.isReg())
5392     return None;
5393   MachineInstr *PtrAdd =
5394       getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5395   if (!PtrAdd)
5396     return None;
5397 
5398   // Check for an immediates which cannot be encoded in the [base + imm]
5399   // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
5400   // end up with code like:
5401   //
5402   // mov x0, wide
5403   // add x1 base, x0
5404   // ldr x2, [x1, x0]
5405   //
5406   // In this situation, we can use the [base, xreg] addressing mode to save an
5407   // add/sub:
5408   //
5409   // mov x0, wide
5410   // ldr x2, [base, x0]
5411   auto ValAndVReg =
5412       getConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);
5413   if (ValAndVReg) {
5414     unsigned Scale = Log2_32(SizeInBytes);
5415     int64_t ImmOff = ValAndVReg->Value.getSExtValue();
5416 
5417     // Skip immediates that can be selected in the load/store addresing
5418     // mode.
5419     if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
5420         ImmOff < (0x1000 << Scale))
5421       return None;
5422 
5423     // Helper lambda to decide whether or not it is preferable to emit an add.
5424     auto isPreferredADD = [](int64_t ImmOff) {
5425       // Constants in [0x0, 0xfff] can be encoded in an add.
5426       if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
5427         return true;
5428 
5429       // Can it be encoded in an add lsl #12?
5430       if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
5431         return false;
5432 
5433       // It can be encoded in an add lsl #12, but we may not want to. If it is
5434       // possible to select this as a single movz, then prefer that. A single
5435       // movz is faster than an add with a shift.
5436       return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
5437              (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
5438     };
5439 
5440     // If the immediate can be encoded in a single add/sub, then bail out.
5441     if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
5442       return None;
5443   }
5444 
5445   // Try to fold shifts into the addressing mode.
5446   auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
5447   if (AddrModeFns)
5448     return AddrModeFns;
5449 
5450   // If that doesn't work, see if it's possible to fold in registers from
5451   // a GEP.
5452   return selectAddrModeRegisterOffset(Root);
5453 }
5454 
5455 /// This is used for computing addresses like this:
5456 ///
5457 /// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
5458 ///
5459 /// Where we have a 64-bit base register, a 32-bit offset register, and an
5460 /// extend (which may or may not be signed).
5461 InstructionSelector::ComplexRendererFns
5462 AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
5463                                               unsigned SizeInBytes) const {
5464   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5465 
5466   MachineInstr *PtrAdd =
5467       getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5468   if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
5469     return None;
5470 
5471   MachineOperand &LHS = PtrAdd->getOperand(1);
5472   MachineOperand &RHS = PtrAdd->getOperand(2);
5473   MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
5474 
5475   // The first case is the same as selectAddrModeXRO, except we need an extend.
5476   // In this case, we try to find a shift and extend, and fold them into the
5477   // addressing mode.
5478   //
5479   // E.g.
5480   //
5481   // off_reg = G_Z/S/ANYEXT ext_reg
5482   // val = G_CONSTANT LegalShiftVal
5483   // shift = G_SHL off_reg val
5484   // ptr = G_PTR_ADD base_reg shift
5485   // x = G_LOAD ptr
5486   //
5487   // In this case we can get a load like this:
5488   //
5489   // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
5490   auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
5491                                        SizeInBytes, /*WantsExt=*/true);
5492   if (ExtendedShl)
5493     return ExtendedShl;
5494 
5495   // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
5496   //
5497   // e.g.
5498   // ldr something, [base_reg, ext_reg, sxtw]
5499   if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
5500     return None;
5501 
5502   // Check if this is an extend. We'll get an extend type if it is.
5503   AArch64_AM::ShiftExtendType Ext =
5504       getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
5505   if (Ext == AArch64_AM::InvalidShiftExtend)
5506     return None;
5507 
5508   // Need a 32-bit wide register.
5509   MachineIRBuilder MIB(*PtrAdd);
5510   Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
5511                                        AArch64::GPR32RegClass, MIB);
5512   unsigned SignExtend = Ext == AArch64_AM::SXTW;
5513 
5514   // Base is LHS, offset is ExtReg.
5515   return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
5516            [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
5517            [=](MachineInstrBuilder &MIB) {
5518              MIB.addImm(SignExtend);
5519              MIB.addImm(0);
5520            }}};
5521 }
5522 
5523 /// Select a "register plus unscaled signed 9-bit immediate" address.  This
5524 /// should only match when there is an offset that is not valid for a scaled
5525 /// immediate addressing mode.  The "Size" argument is the size in bytes of the
5526 /// memory reference, which is needed here to know what is valid for a scaled
5527 /// immediate.
5528 InstructionSelector::ComplexRendererFns
5529 AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
5530                                                    unsigned Size) const {
5531   MachineRegisterInfo &MRI =
5532       Root.getParent()->getParent()->getParent()->getRegInfo();
5533 
5534   if (!Root.isReg())
5535     return None;
5536 
5537   if (!isBaseWithConstantOffset(Root, MRI))
5538     return None;
5539 
5540   MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
5541   if (!RootDef)
5542     return None;
5543 
5544   MachineOperand &OffImm = RootDef->getOperand(2);
5545   if (!OffImm.isReg())
5546     return None;
5547   MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
5548   if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
5549     return None;
5550   int64_t RHSC;
5551   MachineOperand &RHSOp1 = RHS->getOperand(1);
5552   if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
5553     return None;
5554   RHSC = RHSOp1.getCImm()->getSExtValue();
5555 
5556   // If the offset is valid as a scaled immediate, don't match here.
5557   if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
5558     return None;
5559   if (RHSC >= -256 && RHSC < 256) {
5560     MachineOperand &Base = RootDef->getOperand(1);
5561     return {{
5562         [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
5563         [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
5564     }};
5565   }
5566   return None;
5567 }
5568 
5569 InstructionSelector::ComplexRendererFns
5570 AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
5571                                                  unsigned Size,
5572                                                  MachineRegisterInfo &MRI) const {
5573   if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
5574     return None;
5575   MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
5576   if (Adrp.getOpcode() != AArch64::ADRP)
5577     return None;
5578 
5579   // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
5580   // TODO: Need to check GV's offset % size if doing offset folding into globals.
5581   assert(Adrp.getOperand(1).getOffset() == 0 && "Unexpected offset in global");
5582   auto GV = Adrp.getOperand(1).getGlobal();
5583   if (GV->isThreadLocal())
5584     return None;
5585 
5586   auto &MF = *RootDef.getParent()->getParent();
5587   if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
5588     return None;
5589 
5590   unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
5591   MachineIRBuilder MIRBuilder(RootDef);
5592   Register AdrpReg = Adrp.getOperand(0).getReg();
5593   return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
5594            [=](MachineInstrBuilder &MIB) {
5595              MIB.addGlobalAddress(GV, /* Offset */ 0,
5596                                   OpFlags | AArch64II::MO_PAGEOFF |
5597                                       AArch64II::MO_NC);
5598            }}};
5599 }
5600 
5601 /// Select a "register plus scaled unsigned 12-bit immediate" address.  The
5602 /// "Size" argument is the size in bytes of the memory reference, which
5603 /// determines the scale.
5604 InstructionSelector::ComplexRendererFns
5605 AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
5606                                                   unsigned Size) const {
5607   MachineFunction &MF = *Root.getParent()->getParent()->getParent();
5608   MachineRegisterInfo &MRI = MF.getRegInfo();
5609 
5610   if (!Root.isReg())
5611     return None;
5612 
5613   MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
5614   if (!RootDef)
5615     return None;
5616 
5617   if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
5618     return {{
5619         [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
5620         [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
5621     }};
5622   }
5623 
5624   CodeModel::Model CM = MF.getTarget().getCodeModel();
5625   // Check if we can fold in the ADD of small code model ADRP + ADD address.
5626   if (CM == CodeModel::Small) {
5627     auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
5628     if (OpFns)
5629       return OpFns;
5630   }
5631 
5632   if (isBaseWithConstantOffset(Root, MRI)) {
5633     MachineOperand &LHS = RootDef->getOperand(1);
5634     MachineOperand &RHS = RootDef->getOperand(2);
5635     MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
5636     MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
5637     if (LHSDef && RHSDef) {
5638       int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
5639       unsigned Scale = Log2_32(Size);
5640       if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
5641         if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
5642           return {{
5643               [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
5644               [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
5645           }};
5646 
5647         return {{
5648             [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
5649             [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
5650         }};
5651       }
5652     }
5653   }
5654 
5655   // Before falling back to our general case, check if the unscaled
5656   // instructions can handle this. If so, that's preferable.
5657   if (selectAddrModeUnscaled(Root, Size).hasValue())
5658     return None;
5659 
5660   return {{
5661       [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
5662       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
5663   }};
5664 }
5665 
5666 /// Given a shift instruction, return the correct shift type for that
5667 /// instruction.
5668 static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
5669   // TODO: Handle AArch64_AM::ROR
5670   switch (MI.getOpcode()) {
5671   default:
5672     return AArch64_AM::InvalidShiftExtend;
5673   case TargetOpcode::G_SHL:
5674     return AArch64_AM::LSL;
5675   case TargetOpcode::G_LSHR:
5676     return AArch64_AM::LSR;
5677   case TargetOpcode::G_ASHR:
5678     return AArch64_AM::ASR;
5679   }
5680 }
5681 
5682 /// Select a "shifted register" operand. If the value is not shifted, set the
5683 /// shift operand to a default value of "lsl 0".
5684 ///
5685 /// TODO: Allow shifted register to be rotated in logical instructions.
5686 InstructionSelector::ComplexRendererFns
5687 AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const {
5688   if (!Root.isReg())
5689     return None;
5690   MachineRegisterInfo &MRI =
5691       Root.getParent()->getParent()->getParent()->getRegInfo();
5692 
5693   // Check if the operand is defined by an instruction which corresponds to
5694   // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
5695   //
5696   // TODO: Handle AArch64_AM::ROR for logical instructions.
5697   MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
5698   if (!ShiftInst)
5699     return None;
5700   AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst);
5701   if (ShType == AArch64_AM::InvalidShiftExtend)
5702     return None;
5703   if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
5704     return None;
5705 
5706   // Need an immediate on the RHS.
5707   MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
5708   auto Immed = getImmedFromMO(ShiftRHS);
5709   if (!Immed)
5710     return None;
5711 
5712   // We have something that we can fold. Fold in the shift's LHS and RHS into
5713   // the instruction.
5714   MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
5715   Register ShiftReg = ShiftLHS.getReg();
5716 
5717   unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
5718   unsigned Val = *Immed & (NumBits - 1);
5719   unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
5720 
5721   return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
5722            [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
5723 }
5724 
5725 AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
5726     MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
5727   unsigned Opc = MI.getOpcode();
5728 
5729   // Handle explicit extend instructions first.
5730   if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
5731     unsigned Size;
5732     if (Opc == TargetOpcode::G_SEXT)
5733       Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
5734     else
5735       Size = MI.getOperand(2).getImm();
5736     assert(Size != 64 && "Extend from 64 bits?");
5737     switch (Size) {
5738     case 8:
5739       return AArch64_AM::SXTB;
5740     case 16:
5741       return AArch64_AM::SXTH;
5742     case 32:
5743       return AArch64_AM::SXTW;
5744     default:
5745       return AArch64_AM::InvalidShiftExtend;
5746     }
5747   }
5748 
5749   if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
5750     unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
5751     assert(Size != 64 && "Extend from 64 bits?");
5752     switch (Size) {
5753     case 8:
5754       return AArch64_AM::UXTB;
5755     case 16:
5756       return AArch64_AM::UXTH;
5757     case 32:
5758       return AArch64_AM::UXTW;
5759     default:
5760       return AArch64_AM::InvalidShiftExtend;
5761     }
5762   }
5763 
5764   // Don't have an explicit extend. Try to handle a G_AND with a constant mask
5765   // on the RHS.
5766   if (Opc != TargetOpcode::G_AND)
5767     return AArch64_AM::InvalidShiftExtend;
5768 
5769   Optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
5770   if (!MaybeAndMask)
5771     return AArch64_AM::InvalidShiftExtend;
5772   uint64_t AndMask = *MaybeAndMask;
5773   switch (AndMask) {
5774   default:
5775     return AArch64_AM::InvalidShiftExtend;
5776   case 0xFF:
5777     return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
5778   case 0xFFFF:
5779     return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
5780   case 0xFFFFFFFF:
5781     return AArch64_AM::UXTW;
5782   }
5783 }
5784 
5785 Register AArch64InstructionSelector::moveScalarRegClass(
5786     Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
5787   MachineRegisterInfo &MRI = *MIB.getMRI();
5788   auto Ty = MRI.getType(Reg);
5789   assert(!Ty.isVector() && "Expected scalars only!");
5790   if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
5791     return Reg;
5792 
5793   // Create a copy and immediately select it.
5794   // FIXME: We should have an emitCopy function?
5795   auto Copy = MIB.buildCopy({&RC}, {Reg});
5796   selectCopy(*Copy, TII, MRI, TRI, RBI);
5797   return Copy.getReg(0);
5798 }
5799 
5800 /// Select an "extended register" operand. This operand folds in an extend
5801 /// followed by an optional left shift.
5802 InstructionSelector::ComplexRendererFns
5803 AArch64InstructionSelector::selectArithExtendedRegister(
5804     MachineOperand &Root) const {
5805   if (!Root.isReg())
5806     return None;
5807   MachineRegisterInfo &MRI =
5808       Root.getParent()->getParent()->getParent()->getRegInfo();
5809 
5810   uint64_t ShiftVal = 0;
5811   Register ExtReg;
5812   AArch64_AM::ShiftExtendType Ext;
5813   MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
5814   if (!RootDef)
5815     return None;
5816 
5817   if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI))
5818     return None;
5819 
5820   // Check if we can fold a shift and an extend.
5821   if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
5822     // Look for a constant on the RHS of the shift.
5823     MachineOperand &RHS = RootDef->getOperand(2);
5824     Optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
5825     if (!MaybeShiftVal)
5826       return None;
5827     ShiftVal = *MaybeShiftVal;
5828     if (ShiftVal > 4)
5829       return None;
5830     // Look for a valid extend instruction on the LHS of the shift.
5831     MachineOperand &LHS = RootDef->getOperand(1);
5832     MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
5833     if (!ExtDef)
5834       return None;
5835     Ext = getExtendTypeForInst(*ExtDef, MRI);
5836     if (Ext == AArch64_AM::InvalidShiftExtend)
5837       return None;
5838     ExtReg = ExtDef->getOperand(1).getReg();
5839   } else {
5840     // Didn't get a shift. Try just folding an extend.
5841     Ext = getExtendTypeForInst(*RootDef, MRI);
5842     if (Ext == AArch64_AM::InvalidShiftExtend)
5843       return None;
5844     ExtReg = RootDef->getOperand(1).getReg();
5845 
5846     // If we have a 32 bit instruction which zeroes out the high half of a
5847     // register, we get an implicit zero extend for free. Check if we have one.
5848     // FIXME: We actually emit the extend right now even though we don't have
5849     // to.
5850     if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
5851       MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
5852       if (ExtInst && isDef32(*ExtInst))
5853         return None;
5854     }
5855   }
5856 
5857   // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
5858   // copy.
5859   MachineIRBuilder MIB(*RootDef);
5860   ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
5861 
5862   return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
5863            [=](MachineInstrBuilder &MIB) {
5864              MIB.addImm(getArithExtendImm(Ext, ShiftVal));
5865            }}};
5866 }
5867 
5868 void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
5869                                                 const MachineInstr &MI,
5870                                                 int OpIdx) const {
5871   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
5872   assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5873          "Expected G_CONSTANT");
5874   Optional<int64_t> CstVal =
5875       getConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
5876   assert(CstVal && "Expected constant value");
5877   MIB.addImm(CstVal.getValue());
5878 }
5879 
5880 void AArch64InstructionSelector::renderLogicalImm32(
5881   MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
5882   assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5883          "Expected G_CONSTANT");
5884   uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
5885   uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);
5886   MIB.addImm(Enc);
5887 }
5888 
5889 void AArch64InstructionSelector::renderLogicalImm64(
5890   MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
5891   assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5892          "Expected G_CONSTANT");
5893   uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
5894   uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);
5895   MIB.addImm(Enc);
5896 }
5897 
5898 bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
5899     const MachineInstr &MI, unsigned NumBytes) const {
5900   if (!MI.mayLoadOrStore())
5901     return false;
5902   assert(MI.hasOneMemOperand() &&
5903          "Expected load/store to have only one mem op!");
5904   return (*MI.memoperands_begin())->getSize() == NumBytes;
5905 }
5906 
5907 bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
5908   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
5909   if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
5910     return false;
5911 
5912   // Only return true if we know the operation will zero-out the high half of
5913   // the 64-bit register. Truncates can be subregister copies, which don't
5914   // zero out the high bits. Copies and other copy-like instructions can be
5915   // fed by truncates, or could be lowered as subregister copies.
5916   switch (MI.getOpcode()) {
5917   default:
5918     return true;
5919   case TargetOpcode::COPY:
5920   case TargetOpcode::G_BITCAST:
5921   case TargetOpcode::G_TRUNC:
5922   case TargetOpcode::G_PHI:
5923     return false;
5924   }
5925 }
5926 
5927 
5928 // Perform fixups on the given PHI instruction's operands to force them all
5929 // to be the same as the destination regbank.
5930 static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI,
5931                             const AArch64RegisterBankInfo &RBI) {
5932   assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
5933   Register DstReg = MI.getOperand(0).getReg();
5934   const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
5935   assert(DstRB && "Expected PHI dst to have regbank assigned");
5936   MachineIRBuilder MIB(MI);
5937 
5938   // Go through each operand and ensure it has the same regbank.
5939   for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
5940     MachineOperand &MO = MI.getOperand(OpIdx);
5941     if (!MO.isReg())
5942       continue;
5943     Register OpReg = MO.getReg();
5944     const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
5945     if (RB != DstRB) {
5946       // Insert a cross-bank copy.
5947       auto *OpDef = MRI.getVRegDef(OpReg);
5948       const LLT &Ty = MRI.getType(OpReg);
5949       MIB.setInsertPt(*OpDef->getParent(), std::next(OpDef->getIterator()));
5950       auto Copy = MIB.buildCopy(Ty, OpReg);
5951       MRI.setRegBank(Copy.getReg(0), *DstRB);
5952       MO.setReg(Copy.getReg(0));
5953     }
5954   }
5955 }
5956 
5957 void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
5958   // We're looking for PHIs, build a list so we don't invalidate iterators.
5959   MachineRegisterInfo &MRI = MF.getRegInfo();
5960   SmallVector<MachineInstr *, 32> Phis;
5961   for (auto &BB : MF) {
5962     for (auto &MI : BB) {
5963       if (MI.getOpcode() == TargetOpcode::G_PHI)
5964         Phis.emplace_back(&MI);
5965     }
5966   }
5967 
5968   for (auto *MI : Phis) {
5969     // We need to do some work here if the operand types are < 16 bit and they
5970     // are split across fpr/gpr banks. Since all types <32b on gpr
5971     // end up being assigned gpr32 regclasses, we can end up with PHIs here
5972     // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
5973     // be selecting heterogenous regbanks for operands if possible, but we
5974     // still need to be able to deal with it here.
5975     //
5976     // To fix this, if we have a gpr-bank operand < 32b in size and at least
5977     // one other operand is on the fpr bank, then we add cross-bank copies
5978     // to homogenize the operand banks. For simplicity the bank that we choose
5979     // to settle on is whatever bank the def operand has. For example:
5980     //
5981     // %endbb:
5982     //   %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
5983     //  =>
5984     // %bb2:
5985     //   ...
5986     //   %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
5987     //   ...
5988     // %endbb:
5989     //   %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
5990     bool HasGPROp = false, HasFPROp = false;
5991     for (unsigned OpIdx = 1; OpIdx < MI->getNumOperands(); ++OpIdx) {
5992       const auto &MO = MI->getOperand(OpIdx);
5993       if (!MO.isReg())
5994         continue;
5995       const LLT &Ty = MRI.getType(MO.getReg());
5996       if (!Ty.isValid() || !Ty.isScalar())
5997         break;
5998       if (Ty.getSizeInBits() >= 32)
5999         break;
6000       const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
6001       // If for some reason we don't have a regbank yet. Don't try anything.
6002       if (!RB)
6003         break;
6004 
6005       if (RB->getID() == AArch64::GPRRegBankID)
6006         HasGPROp = true;
6007       else
6008         HasFPROp = true;
6009     }
6010     // We have heterogenous regbanks, need to fixup.
6011     if (HasGPROp && HasFPROp)
6012       fixupPHIOpBanks(*MI, MRI, RBI);
6013   }
6014 }
6015 
6016 namespace llvm {
6017 InstructionSelector *
6018 createAArch64InstructionSelector(const AArch64TargetMachine &TM,
6019                                  AArch64Subtarget &Subtarget,
6020                                  AArch64RegisterBankInfo &RBI) {
6021   return new AArch64InstructionSelector(TM, Subtarget, RBI);
6022 }
6023 }
6024