1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the AArch64 target.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AArch64MachineFunctionInfo.h"
14 #include "AArch64TargetMachine.h"
15 #include "MCTargetDesc/AArch64AddressingModes.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/CodeGen/ISDOpcodes.h"
18 #include "llvm/CodeGen/SelectionDAGISel.h"
19 #include "llvm/IR/Function.h" // To access function attributes.
20 #include "llvm/IR/GlobalValue.h"
21 #include "llvm/IR/Intrinsics.h"
22 #include "llvm/IR/IntrinsicsAArch64.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include "llvm/Support/KnownBits.h"
26 #include "llvm/Support/MathExtras.h"
27 #include "llvm/Support/raw_ostream.h"
28 
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "aarch64-isel"
32 #define PASS_NAME "AArch64 Instruction Selection"
33 
34 //===--------------------------------------------------------------------===//
35 /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
36 /// instructions for SelectionDAG operations.
37 ///
38 namespace {
39 
40 class AArch64DAGToDAGISel : public SelectionDAGISel {
41 
42   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
43   /// make the right decision when generating code for different targets.
44   const AArch64Subtarget *Subtarget;
45 
46 public:
47   static char ID;
48 
49   AArch64DAGToDAGISel() = delete;
50 
51   explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
52                                CodeGenOpt::Level OptLevel)
53       : SelectionDAGISel(ID, tm, OptLevel), Subtarget(nullptr) {}
54 
55   bool runOnMachineFunction(MachineFunction &MF) override {
56     Subtarget = &MF.getSubtarget<AArch64Subtarget>();
57     return SelectionDAGISel::runOnMachineFunction(MF);
58   }
59 
60   void Select(SDNode *Node) override;
61 
62   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
63   /// inline asm expressions.
64   bool SelectInlineAsmMemoryOperand(const SDValue &Op,
65                                     unsigned ConstraintID,
66                                     std::vector<SDValue> &OutOps) override;
67 
68   template <signed Low, signed High, signed Scale>
69   bool SelectRDVLImm(SDValue N, SDValue &Imm);
70 
71   bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
72   bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
73   bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
74   bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
75   bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
76     return SelectShiftedRegister(N, false, Reg, Shift);
77   }
78   bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
79     return SelectShiftedRegister(N, true, Reg, Shift);
80   }
81   bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
82     return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
83   }
84   bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
85     return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
86   }
87   bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
88     return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
89   }
90   bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
91     return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
92   }
93   bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
94     return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
95   }
96   bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
97     return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
98   }
99   bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
100     return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
101   }
102   bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
103     return SelectAddrModeIndexed(N, 1, Base, OffImm);
104   }
105   bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
106     return SelectAddrModeIndexed(N, 2, Base, OffImm);
107   }
108   bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
109     return SelectAddrModeIndexed(N, 4, Base, OffImm);
110   }
111   bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
112     return SelectAddrModeIndexed(N, 8, Base, OffImm);
113   }
114   bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
115     return SelectAddrModeIndexed(N, 16, Base, OffImm);
116   }
117   bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
118     return SelectAddrModeUnscaled(N, 1, Base, OffImm);
119   }
120   bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
121     return SelectAddrModeUnscaled(N, 2, Base, OffImm);
122   }
123   bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
124     return SelectAddrModeUnscaled(N, 4, Base, OffImm);
125   }
126   bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
127     return SelectAddrModeUnscaled(N, 8, Base, OffImm);
128   }
129   bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
130     return SelectAddrModeUnscaled(N, 16, Base, OffImm);
131   }
132   template <unsigned Size, unsigned Max>
133   bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
134     // Test if there is an appropriate addressing mode and check if the
135     // immediate fits.
136     bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
137     if (Found) {
138       if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
139         int64_t C = CI->getSExtValue();
140         if (C <= Max)
141           return true;
142       }
143     }
144 
145     // Otherwise, base only, materialize address in register.
146     Base = N;
147     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
148     return true;
149   }
150 
151   template<int Width>
152   bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
153                          SDValue &SignExtend, SDValue &DoShift) {
154     return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
155   }
156 
157   template<int Width>
158   bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
159                          SDValue &SignExtend, SDValue &DoShift) {
160     return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
161   }
162 
163   bool SelectExtractHigh(SDValue N, SDValue &Res) {
164     if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
165       N = N->getOperand(0);
166     if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
167         !isa<ConstantSDNode>(N->getOperand(1)))
168       return false;
169     EVT VT = N->getValueType(0);
170     EVT LVT = N->getOperand(0).getValueType();
171     unsigned Index = N->getConstantOperandVal(1);
172     if (!VT.is64BitVector() || !LVT.is128BitVector() ||
173         Index != VT.getVectorNumElements())
174       return false;
175     Res = N->getOperand(0);
176     return true;
177   }
178 
179   bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
180     if (N.getOpcode() != AArch64ISD::VLSHR)
181       return false;
182     SDValue Op = N->getOperand(0);
183     EVT VT = Op.getValueType();
184     unsigned ShtAmt = N->getConstantOperandVal(1);
185     if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
186       return false;
187 
188     APInt Imm;
189     if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
190       Imm = APInt(VT.getScalarSizeInBits(),
191                   Op.getOperand(1).getConstantOperandVal(0)
192                       << Op.getOperand(1).getConstantOperandVal(1));
193     else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
194              isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
195       Imm = APInt(VT.getScalarSizeInBits(),
196                   Op.getOperand(1).getConstantOperandVal(0));
197     else
198       return false;
199 
200     if (Imm != 1ULL << (ShtAmt - 1))
201       return false;
202 
203     Res1 = Op.getOperand(0);
204     Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
205     return true;
206   }
207 
208   bool SelectDupZeroOrUndef(SDValue N) {
209     switch(N->getOpcode()) {
210     case ISD::UNDEF:
211       return true;
212     case AArch64ISD::DUP:
213     case ISD::SPLAT_VECTOR: {
214       auto Opnd0 = N->getOperand(0);
215       if (isNullConstant(Opnd0))
216         return true;
217       if (isNullFPConstant(Opnd0))
218         return true;
219       break;
220     }
221     default:
222       break;
223     }
224 
225     return false;
226   }
227 
228   bool SelectDupZero(SDValue N) {
229     switch(N->getOpcode()) {
230     case AArch64ISD::DUP:
231     case ISD::SPLAT_VECTOR: {
232       auto Opnd0 = N->getOperand(0);
233       if (isNullConstant(Opnd0))
234         return true;
235       if (isNullFPConstant(Opnd0))
236         return true;
237       break;
238     }
239     }
240 
241     return false;
242   }
243 
244   bool SelectDupNegativeZero(SDValue N) {
245     switch(N->getOpcode()) {
246     case AArch64ISD::DUP:
247     case ISD::SPLAT_VECTOR: {
248       ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(N->getOperand(0));
249       return Const && Const->isZero() && Const->isNegative();
250     }
251     }
252 
253     return false;
254   }
255 
256   template<MVT::SimpleValueType VT>
257   bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
258     return SelectSVEAddSubImm(N, VT, Imm, Shift);
259   }
260 
261   template <MVT::SimpleValueType VT>
262   bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
263     return SelectSVECpyDupImm(N, VT, Imm, Shift);
264   }
265 
266   template <MVT::SimpleValueType VT, bool Invert = false>
267   bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
268     return SelectSVELogicalImm(N, VT, Imm, Invert);
269   }
270 
271   template <MVT::SimpleValueType VT>
272   bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
273     return SelectSVEArithImm(N, VT, Imm);
274   }
275 
276   template <unsigned Low, unsigned High, bool AllowSaturation = false>
277   bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
278     return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
279   }
280 
281   bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
282     if (N->getOpcode() != ISD::SPLAT_VECTOR)
283       return false;
284 
285     EVT EltVT = N->getValueType(0).getVectorElementType();
286     return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
287                              /* High */ EltVT.getFixedSizeInBits(),
288                              /* AllowSaturation */ true, Imm);
289   }
290 
291   // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
292   template<signed Min, signed Max, signed Scale, bool Shift>
293   bool SelectCntImm(SDValue N, SDValue &Imm) {
294     if (!isa<ConstantSDNode>(N))
295       return false;
296 
297     int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
298     if (Shift)
299       MulImm = 1LL << MulImm;
300 
301     if ((MulImm % std::abs(Scale)) != 0)
302       return false;
303 
304     MulImm /= Scale;
305     if ((MulImm >= Min) && (MulImm <= Max)) {
306       Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
307       return true;
308     }
309 
310     return false;
311   }
312 
313   template <signed Max, signed Scale>
314   bool SelectEXTImm(SDValue N, SDValue &Imm) {
315     if (!isa<ConstantSDNode>(N))
316       return false;
317 
318     int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
319 
320     if (MulImm >= 0 && MulImm <= Max) {
321       MulImm *= Scale;
322       Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
323       return true;
324     }
325 
326     return false;
327   }
328 
329   template <unsigned BaseReg> bool ImmToTile(SDValue N, SDValue &Imm) {
330     if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
331       uint64_t C = CI->getZExtValue();
332       Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
333       return true;
334     }
335     return false;
336   }
337 
338   /// Form sequences of consecutive 64/128-bit registers for use in NEON
339   /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
340   /// between 1 and 4 elements. If it contains a single element that is returned
341   /// unchanged; otherwise a REG_SEQUENCE value is returned.
342   SDValue createDTuple(ArrayRef<SDValue> Vecs);
343   SDValue createQTuple(ArrayRef<SDValue> Vecs);
344   // Form a sequence of SVE registers for instructions using list of vectors,
345   // e.g. structured loads and stores (ldN, stN).
346   SDValue createZTuple(ArrayRef<SDValue> Vecs);
347 
348   // Similar to above, except the register must start at a multiple of the
349   // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
350   SDValue createZMulTuple(ArrayRef<SDValue> Regs);
351 
352   /// Generic helper for the createDTuple/createQTuple
353   /// functions. Those should almost always be called instead.
354   SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
355                       const unsigned SubRegs[]);
356 
357   void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
358 
359   bool tryIndexedLoad(SDNode *N);
360 
361   bool trySelectStackSlotTagP(SDNode *N);
362   void SelectTagP(SDNode *N);
363 
364   void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
365                      unsigned SubRegIdx);
366   void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
367                          unsigned SubRegIdx);
368   void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
369   void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
370   void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
371                             unsigned Opc_rr, unsigned Opc_ri,
372                             bool IsIntr = false);
373   void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
374                                        unsigned Scale, unsigned Opc_ri,
375                                        unsigned Opc_rr);
376   void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
377                                        bool IsZmMulti, unsigned Opcode,
378                                        bool HasPred = false);
379   void SelectPExtPair(SDNode *N, unsigned Opc);
380   void SelectWhilePair(SDNode *N, unsigned Opc);
381   void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
382   void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
383   void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
384                                  bool IsTupleInput, unsigned Opc);
385   void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
386 
387   template <unsigned MaxIdx, unsigned Scale>
388   void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
389                              unsigned Op);
390 
391   bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
392   /// SVE Reg+Imm addressing mode.
393   template <int64_t Min, int64_t Max>
394   bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
395                                 SDValue &OffImm);
396   /// SVE Reg+Reg address mode.
397   template <unsigned Scale>
398   bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
399     return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
400   }
401 
402   template <unsigned MaxIdx, unsigned Scale>
403   bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
404     return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
405   }
406 
407   void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
408   void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
409   void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
410   void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
411   void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
412                              unsigned Opc_rr, unsigned Opc_ri);
413   std::tuple<unsigned, SDValue, SDValue>
414   findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
415                            const SDValue &OldBase, const SDValue &OldOffset,
416                            unsigned Scale);
417 
418   bool tryBitfieldExtractOp(SDNode *N);
419   bool tryBitfieldExtractOpFromSExt(SDNode *N);
420   bool tryBitfieldInsertOp(SDNode *N);
421   bool tryBitfieldInsertInZeroOp(SDNode *N);
422   bool tryShiftAmountMod(SDNode *N);
423 
424   bool tryReadRegister(SDNode *N);
425   bool tryWriteRegister(SDNode *N);
426 
427   bool trySelectCastFixedLengthToScalableVector(SDNode *N);
428   bool trySelectCastScalableToFixedLengthVector(SDNode *N);
429 
430 // Include the pieces autogenerated from the target description.
431 #include "AArch64GenDAGISel.inc"
432 
433 private:
434   bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
435                              SDValue &Shift);
436   bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
437   bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
438                                SDValue &OffImm) {
439     return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
440   }
441   bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
442                                      unsigned Size, SDValue &Base,
443                                      SDValue &OffImm);
444   bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
445                              SDValue &OffImm);
446   bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
447                               SDValue &OffImm);
448   bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
449                          SDValue &Offset, SDValue &SignExtend,
450                          SDValue &DoShift);
451   bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
452                          SDValue &Offset, SDValue &SignExtend,
453                          SDValue &DoShift);
454   bool isWorthFolding(SDValue V) const;
455   bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
456                          SDValue &Offset, SDValue &SignExtend);
457 
458   template<unsigned RegWidth>
459   bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
460     return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
461   }
462 
463   bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
464 
465   bool SelectCMP_SWAP(SDNode *N);
466 
467   bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
468   bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
469   bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
470 
471   bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
472   bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
473                          bool AllowSaturation, SDValue &Imm);
474 
475   bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
476   bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
477                                SDValue &Offset);
478   bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
479                           SDValue &Offset, unsigned Scale = 1);
480 
481   bool SelectAllActivePredicate(SDValue N);
482   bool SelectAnyPredicate(SDValue N);
483 };
484 } // end anonymous namespace
485 
486 char AArch64DAGToDAGISel::ID = 0;
487 
488 INITIALIZE_PASS(AArch64DAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false)
489 
490 /// isIntImmediate - This method tests to see if the node is a constant
491 /// operand. If so Imm will receive the 32-bit value.
492 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
493   if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
494     Imm = C->getZExtValue();
495     return true;
496   }
497   return false;
498 }
499 
500 // isIntImmediate - This method tests to see if a constant operand.
501 // If so Imm will receive the value.
502 static bool isIntImmediate(SDValue N, uint64_t &Imm) {
503   return isIntImmediate(N.getNode(), Imm);
504 }
505 
506 // isOpcWithIntImmediate - This method tests to see if the node is a specific
507 // opcode and that it has a immediate integer right operand.
508 // If so Imm will receive the 32 bit value.
509 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
510                                   uint64_t &Imm) {
511   return N->getOpcode() == Opc &&
512          isIntImmediate(N->getOperand(1).getNode(), Imm);
513 }
514 
515 // isIntImmediateEq - This method tests to see if N is a constant operand that
516 // is equivalent to 'ImmExpected'.
517 #ifndef NDEBUG
518 static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
519   uint64_t Imm;
520   if (!isIntImmediate(N.getNode(), Imm))
521     return false;
522   return Imm == ImmExpected;
523 }
524 #endif
525 
526 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
527     const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
528   switch(ConstraintID) {
529   default:
530     llvm_unreachable("Unexpected asm memory constraint");
531   case InlineAsm::Constraint_m:
532   case InlineAsm::Constraint_o:
533   case InlineAsm::Constraint_Q:
534     // We need to make sure that this one operand does not end up in XZR, thus
535     // require the address to be in a PointerRegClass register.
536     const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
537     const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
538     SDLoc dl(Op);
539     SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
540     SDValue NewOp =
541         SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
542                                        dl, Op.getValueType(),
543                                        Op, RC), 0);
544     OutOps.push_back(NewOp);
545     return false;
546   }
547   return true;
548 }
549 
550 /// SelectArithImmed - Select an immediate value that can be represented as
551 /// a 12-bit value shifted left by either 0 or 12.  If so, return true with
552 /// Val set to the 12-bit value and Shift set to the shifter operand.
553 bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
554                                            SDValue &Shift) {
555   // This function is called from the addsub_shifted_imm ComplexPattern,
556   // which lists [imm] as the list of opcode it's interested in, however
557   // we still need to check whether the operand is actually an immediate
558   // here because the ComplexPattern opcode list is only used in
559   // root-level opcode matching.
560   if (!isa<ConstantSDNode>(N.getNode()))
561     return false;
562 
563   uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
564   unsigned ShiftAmt;
565 
566   if (Immed >> 12 == 0) {
567     ShiftAmt = 0;
568   } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
569     ShiftAmt = 12;
570     Immed = Immed >> 12;
571   } else
572     return false;
573 
574   unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
575   SDLoc dl(N);
576   Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
577   Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
578   return true;
579 }
580 
581 /// SelectNegArithImmed - As above, but negates the value before trying to
582 /// select it.
583 bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
584                                               SDValue &Shift) {
585   // This function is called from the addsub_shifted_imm ComplexPattern,
586   // which lists [imm] as the list of opcode it's interested in, however
587   // we still need to check whether the operand is actually an immediate
588   // here because the ComplexPattern opcode list is only used in
589   // root-level opcode matching.
590   if (!isa<ConstantSDNode>(N.getNode()))
591     return false;
592 
593   // The immediate operand must be a 24-bit zero-extended immediate.
594   uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
595 
596   // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
597   // have the opposite effect on the C flag, so this pattern mustn't match under
598   // those circumstances.
599   if (Immed == 0)
600     return false;
601 
602   if (N.getValueType() == MVT::i32)
603     Immed = ~((uint32_t)Immed) + 1;
604   else
605     Immed = ~Immed + 1ULL;
606   if (Immed & 0xFFFFFFFFFF000000ULL)
607     return false;
608 
609   Immed &= 0xFFFFFFULL;
610   return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
611                           Shift);
612 }
613 
614 /// getShiftTypeForNode - Translate a shift node to the corresponding
615 /// ShiftType value.
616 static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
617   switch (N.getOpcode()) {
618   default:
619     return AArch64_AM::InvalidShiftExtend;
620   case ISD::SHL:
621     return AArch64_AM::LSL;
622   case ISD::SRL:
623     return AArch64_AM::LSR;
624   case ISD::SRA:
625     return AArch64_AM::ASR;
626   case ISD::ROTR:
627     return AArch64_AM::ROR;
628   }
629 }
630 
631 /// Determine whether it is worth it to fold SHL into the addressing
632 /// mode.
633 static bool isWorthFoldingSHL(SDValue V) {
634   assert(V.getOpcode() == ISD::SHL && "invalid opcode");
635   // It is worth folding logical shift of up to three places.
636   auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
637   if (!CSD)
638     return false;
639   unsigned ShiftVal = CSD->getZExtValue();
640   if (ShiftVal > 3)
641     return false;
642 
643   // Check if this particular node is reused in any non-memory related
644   // operation.  If yes, do not try to fold this node into the address
645   // computation, since the computation will be kept.
646   const SDNode *Node = V.getNode();
647   for (SDNode *UI : Node->uses())
648     if (!isa<MemSDNode>(*UI))
649       for (SDNode *UII : UI->uses())
650         if (!isa<MemSDNode>(*UII))
651           return false;
652   return true;
653 }
654 
655 /// Determine whether it is worth to fold V into an extended register.
656 bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
657   // Trivial if we are optimizing for code size or if there is only
658   // one use of the value.
659   if (CurDAG->shouldOptForSize() || V.hasOneUse())
660     return true;
661   // If a subtarget has a fastpath LSL we can fold a logical shift into
662   // the addressing mode and save a cycle.
663   if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL &&
664       isWorthFoldingSHL(V))
665     return true;
666   if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) {
667     const SDValue LHS = V.getOperand(0);
668     const SDValue RHS = V.getOperand(1);
669     if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
670       return true;
671     if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
672       return true;
673   }
674 
675   // It hurts otherwise, since the value will be reused.
676   return false;
677 }
678 
679 /// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
680 /// to select more shifted register
681 bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
682                                                        SDValue &Shift) {
683   EVT VT = N.getValueType();
684   if (VT != MVT::i32 && VT != MVT::i64)
685     return false;
686 
687   if (N->getOpcode() != ISD::AND || !N->hasOneUse())
688     return false;
689   SDValue LHS = N.getOperand(0);
690   if (!LHS->hasOneUse())
691     return false;
692 
693   unsigned LHSOpcode = LHS->getOpcode();
694   if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
695     return false;
696 
697   ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
698   if (!ShiftAmtNode)
699     return false;
700 
701   uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
702   ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
703   if (!RHSC)
704     return false;
705 
706   APInt AndMask = RHSC->getAPIntValue();
707   unsigned LowZBits, MaskLen;
708   if (!AndMask.isShiftedMask(LowZBits, MaskLen))
709     return false;
710 
711   unsigned BitWidth = N.getValueSizeInBits();
712   SDLoc DL(LHS);
713   uint64_t NewShiftC;
714   unsigned NewShiftOp;
715   if (LHSOpcode == ISD::SHL) {
716     // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
717     // BitWidth != LowZBits + MaskLen doesn't match the pattern
718     if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
719       return false;
720 
721     NewShiftC = LowZBits - ShiftAmtC;
722     NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
723   } else {
724     if (LowZBits == 0)
725       return false;
726 
727     // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
728     NewShiftC = LowZBits + ShiftAmtC;
729     if (NewShiftC >= BitWidth)
730       return false;
731 
732     // SRA need all high bits
733     if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
734       return false;
735 
736     // SRL high bits can be 0 or 1
737     if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
738       return false;
739 
740     if (LHSOpcode == ISD::SRL)
741       NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
742     else
743       NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
744   }
745 
746   assert(NewShiftC < BitWidth && "Invalid shift amount");
747   SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
748   SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
749   Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
750                                        NewShiftAmt, BitWidthMinus1),
751                 0);
752   unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
753   Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
754   return true;
755 }
756 
757 /// SelectShiftedRegister - Select a "shifted register" operand.  If the value
758 /// is not shifted, set the Shift operand to default of "LSL 0".  The logical
759 /// instructions allow the shifted register to be rotated, but the arithmetic
760 /// instructions do not.  The AllowROR parameter specifies whether ROR is
761 /// supported.
762 bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
763                                                 SDValue &Reg, SDValue &Shift) {
764   if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
765     return true;
766 
767   AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N);
768   if (ShType == AArch64_AM::InvalidShiftExtend)
769     return false;
770   if (!AllowROR && ShType == AArch64_AM::ROR)
771     return false;
772 
773   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
774     unsigned BitSize = N.getValueSizeInBits();
775     unsigned Val = RHS->getZExtValue() & (BitSize - 1);
776     unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
777 
778     Reg = N.getOperand(0);
779     Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
780     return isWorthFolding(N);
781   }
782 
783   return false;
784 }
785 
786 /// getExtendTypeForNode - Translate an extend node to the corresponding
787 /// ExtendType value.
788 static AArch64_AM::ShiftExtendType
789 getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
790   if (N.getOpcode() == ISD::SIGN_EXTEND ||
791       N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
792     EVT SrcVT;
793     if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
794       SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
795     else
796       SrcVT = N.getOperand(0).getValueType();
797 
798     if (!IsLoadStore && SrcVT == MVT::i8)
799       return AArch64_AM::SXTB;
800     else if (!IsLoadStore && SrcVT == MVT::i16)
801       return AArch64_AM::SXTH;
802     else if (SrcVT == MVT::i32)
803       return AArch64_AM::SXTW;
804     assert(SrcVT != MVT::i64 && "extend from 64-bits?");
805 
806     return AArch64_AM::InvalidShiftExtend;
807   } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
808              N.getOpcode() == ISD::ANY_EXTEND) {
809     EVT SrcVT = N.getOperand(0).getValueType();
810     if (!IsLoadStore && SrcVT == MVT::i8)
811       return AArch64_AM::UXTB;
812     else if (!IsLoadStore && SrcVT == MVT::i16)
813       return AArch64_AM::UXTH;
814     else if (SrcVT == MVT::i32)
815       return AArch64_AM::UXTW;
816     assert(SrcVT != MVT::i64 && "extend from 64-bits?");
817 
818     return AArch64_AM::InvalidShiftExtend;
819   } else if (N.getOpcode() == ISD::AND) {
820     ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
821     if (!CSD)
822       return AArch64_AM::InvalidShiftExtend;
823     uint64_t AndMask = CSD->getZExtValue();
824 
825     switch (AndMask) {
826     default:
827       return AArch64_AM::InvalidShiftExtend;
828     case 0xFF:
829       return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
830     case 0xFFFF:
831       return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
832     case 0xFFFFFFFF:
833       return AArch64_AM::UXTW;
834     }
835   }
836 
837   return AArch64_AM::InvalidShiftExtend;
838 }
839 
840 /// Instructions that accept extend modifiers like UXTW expect the register
841 /// being extended to be a GPR32, but the incoming DAG might be acting on a
842 /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
843 /// this is the case.
844 static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) {
845   if (N.getValueType() == MVT::i32)
846     return N;
847 
848   SDLoc dl(N);
849   return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
850 }
851 
852 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
853 template<signed Low, signed High, signed Scale>
854 bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
855   if (!isa<ConstantSDNode>(N))
856     return false;
857 
858   int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
859   if ((MulImm % std::abs(Scale)) == 0) {
860     int64_t RDVLImm = MulImm / Scale;
861     if ((RDVLImm >= Low) && (RDVLImm <= High)) {
862       Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
863       return true;
864     }
865   }
866 
867   return false;
868 }
869 
870 /// SelectArithExtendedRegister - Select a "extended register" operand.  This
871 /// operand folds in an extend followed by an optional left shift.
872 bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
873                                                       SDValue &Shift) {
874   unsigned ShiftVal = 0;
875   AArch64_AM::ShiftExtendType Ext;
876 
877   if (N.getOpcode() == ISD::SHL) {
878     ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
879     if (!CSD)
880       return false;
881     ShiftVal = CSD->getZExtValue();
882     if (ShiftVal > 4)
883       return false;
884 
885     Ext = getExtendTypeForNode(N.getOperand(0));
886     if (Ext == AArch64_AM::InvalidShiftExtend)
887       return false;
888 
889     Reg = N.getOperand(0).getOperand(0);
890   } else {
891     Ext = getExtendTypeForNode(N);
892     if (Ext == AArch64_AM::InvalidShiftExtend)
893       return false;
894 
895     Reg = N.getOperand(0);
896 
897     // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
898     // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
899     auto isDef32 = [](SDValue N) {
900       unsigned Opc = N.getOpcode();
901       return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
902              Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
903              Opc != ISD::AssertZext && Opc != ISD::AssertAlign &&
904              Opc != ISD::FREEZE;
905     };
906     if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
907         isDef32(Reg))
908       return false;
909   }
910 
911   // AArch64 mandates that the RHS of the operation must use the smallest
912   // register class that could contain the size being extended from.  Thus,
913   // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
914   // there might not be an actual 32-bit value in the program.  We can
915   // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
916   assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
917   Reg = narrowIfNeeded(CurDAG, Reg);
918   Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
919                                     MVT::i32);
920   return isWorthFolding(N);
921 }
922 
923 /// SelectArithUXTXRegister - Select a "UXTX register" operand. This
924 /// operand is refered by the instructions have SP operand
925 bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
926                                                   SDValue &Shift) {
927   unsigned ShiftVal = 0;
928   AArch64_AM::ShiftExtendType Ext;
929 
930   if (N.getOpcode() != ISD::SHL)
931     return false;
932 
933   ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
934   if (!CSD)
935     return false;
936   ShiftVal = CSD->getZExtValue();
937   if (ShiftVal > 4)
938     return false;
939 
940   Ext = AArch64_AM::UXTX;
941   Reg = N.getOperand(0);
942   Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
943                                     MVT::i32);
944   return isWorthFolding(N);
945 }
946 
947 /// If there's a use of this ADDlow that's not itself a load/store then we'll
948 /// need to create a real ADD instruction from it anyway and there's no point in
949 /// folding it into the mem op. Theoretically, it shouldn't matter, but there's
950 /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
951 /// leads to duplicated ADRP instructions.
952 static bool isWorthFoldingADDlow(SDValue N) {
953   for (auto *Use : N->uses()) {
954     if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
955         Use->getOpcode() != ISD::ATOMIC_LOAD &&
956         Use->getOpcode() != ISD::ATOMIC_STORE)
957       return false;
958 
959     // ldar and stlr have much more restrictive addressing modes (just a
960     // register).
961     if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getSuccessOrdering()))
962       return false;
963   }
964 
965   return true;
966 }
967 
968 /// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
969 /// immediate" address.  The "Size" argument is the size in bytes of the memory
970 /// reference, which determines the scale.
971 bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
972                                                         unsigned BW, unsigned Size,
973                                                         SDValue &Base,
974                                                         SDValue &OffImm) {
975   SDLoc dl(N);
976   const DataLayout &DL = CurDAG->getDataLayout();
977   const TargetLowering *TLI = getTargetLowering();
978   if (N.getOpcode() == ISD::FrameIndex) {
979     int FI = cast<FrameIndexSDNode>(N)->getIndex();
980     Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
981     OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
982     return true;
983   }
984 
985   // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
986   // selected here doesn't support labels/immediates, only base+offset.
987   if (CurDAG->isBaseWithConstantOffset(N)) {
988     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
989       if (IsSignedImm) {
990         int64_t RHSC = RHS->getSExtValue();
991         unsigned Scale = Log2_32(Size);
992         int64_t Range = 0x1LL << (BW - 1);
993 
994         if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
995             RHSC < (Range << Scale)) {
996           Base = N.getOperand(0);
997           if (Base.getOpcode() == ISD::FrameIndex) {
998             int FI = cast<FrameIndexSDNode>(Base)->getIndex();
999             Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1000           }
1001           OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1002           return true;
1003         }
1004       } else {
1005         // unsigned Immediate
1006         uint64_t RHSC = RHS->getZExtValue();
1007         unsigned Scale = Log2_32(Size);
1008         uint64_t Range = 0x1ULL << BW;
1009 
1010         if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1011           Base = N.getOperand(0);
1012           if (Base.getOpcode() == ISD::FrameIndex) {
1013             int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1014             Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1015           }
1016           OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1017           return true;
1018         }
1019       }
1020     }
1021   }
1022   // Base only. The address will be materialized into a register before
1023   // the memory is accessed.
1024   //    add x0, Xbase, #offset
1025   //    stp x1, x2, [x0]
1026   Base = N;
1027   OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1028   return true;
1029 }
1030 
1031 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1032 /// immediate" address.  The "Size" argument is the size in bytes of the memory
1033 /// reference, which determines the scale.
1034 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1035                                               SDValue &Base, SDValue &OffImm) {
1036   SDLoc dl(N);
1037   const DataLayout &DL = CurDAG->getDataLayout();
1038   const TargetLowering *TLI = getTargetLowering();
1039   if (N.getOpcode() == ISD::FrameIndex) {
1040     int FI = cast<FrameIndexSDNode>(N)->getIndex();
1041     Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1042     OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1043     return true;
1044   }
1045 
1046   if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1047     GlobalAddressSDNode *GAN =
1048         dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1049     Base = N.getOperand(0);
1050     OffImm = N.getOperand(1);
1051     if (!GAN)
1052       return true;
1053 
1054     if (GAN->getOffset() % Size == 0 &&
1055         GAN->getGlobal()->getPointerAlignment(DL) >= Size)
1056       return true;
1057   }
1058 
1059   if (CurDAG->isBaseWithConstantOffset(N)) {
1060     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1061       int64_t RHSC = (int64_t)RHS->getZExtValue();
1062       unsigned Scale = Log2_32(Size);
1063       if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
1064         Base = N.getOperand(0);
1065         if (Base.getOpcode() == ISD::FrameIndex) {
1066           int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1067           Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1068         }
1069         OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1070         return true;
1071       }
1072     }
1073   }
1074 
1075   // Before falling back to our general case, check if the unscaled
1076   // instructions can handle this. If so, that's preferable.
1077   if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1078     return false;
1079 
1080   // Base only. The address will be materialized into a register before
1081   // the memory is accessed.
1082   //    add x0, Xbase, #offset
1083   //    ldr x0, [x0]
1084   Base = N;
1085   OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1086   return true;
1087 }
1088 
1089 /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1090 /// immediate" address.  This should only match when there is an offset that
1091 /// is not valid for a scaled immediate addressing mode.  The "Size" argument
1092 /// is the size in bytes of the memory reference, which is needed here to know
1093 /// what is valid for a scaled immediate.
1094 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1095                                                  SDValue &Base,
1096                                                  SDValue &OffImm) {
1097   if (!CurDAG->isBaseWithConstantOffset(N))
1098     return false;
1099   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1100     int64_t RHSC = RHS->getSExtValue();
1101     // If the offset is valid as a scaled immediate, don't match here.
1102     if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
1103         RHSC < (0x1000 << Log2_32(Size)))
1104       return false;
1105     if (RHSC >= -256 && RHSC < 256) {
1106       Base = N.getOperand(0);
1107       if (Base.getOpcode() == ISD::FrameIndex) {
1108         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1109         const TargetLowering *TLI = getTargetLowering();
1110         Base = CurDAG->getTargetFrameIndex(
1111             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1112       }
1113       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1114       return true;
1115     }
1116   }
1117   return false;
1118 }
1119 
1120 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
1121   SDLoc dl(N);
1122   SDValue ImpDef = SDValue(
1123       CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1124   return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
1125                                        N);
1126 }
1127 
1128 /// Check if the given SHL node (\p N), can be used to form an
1129 /// extended register for an addressing mode.
1130 bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1131                                             bool WantExtend, SDValue &Offset,
1132                                             SDValue &SignExtend) {
1133   assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1134   ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1135   if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1136     return false;
1137 
1138   SDLoc dl(N);
1139   if (WantExtend) {
1140     AArch64_AM::ShiftExtendType Ext =
1141         getExtendTypeForNode(N.getOperand(0), true);
1142     if (Ext == AArch64_AM::InvalidShiftExtend)
1143       return false;
1144 
1145     Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1146     SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1147                                            MVT::i32);
1148   } else {
1149     Offset = N.getOperand(0);
1150     SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1151   }
1152 
1153   unsigned LegalShiftVal = Log2_32(Size);
1154   unsigned ShiftVal = CSD->getZExtValue();
1155 
1156   if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1157     return false;
1158 
1159   return isWorthFolding(N);
1160 }
1161 
1162 bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1163                                             SDValue &Base, SDValue &Offset,
1164                                             SDValue &SignExtend,
1165                                             SDValue &DoShift) {
1166   if (N.getOpcode() != ISD::ADD)
1167     return false;
1168   SDValue LHS = N.getOperand(0);
1169   SDValue RHS = N.getOperand(1);
1170   SDLoc dl(N);
1171 
1172   // We don't want to match immediate adds here, because they are better lowered
1173   // to the register-immediate addressing modes.
1174   if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
1175     return false;
1176 
1177   // Check if this particular node is reused in any non-memory related
1178   // operation.  If yes, do not try to fold this node into the address
1179   // computation, since the computation will be kept.
1180   const SDNode *Node = N.getNode();
1181   for (SDNode *UI : Node->uses()) {
1182     if (!isa<MemSDNode>(*UI))
1183       return false;
1184   }
1185 
1186   // Remember if it is worth folding N when it produces extended register.
1187   bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
1188 
1189   // Try to match a shifted extend on the RHS.
1190   if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1191       SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1192     Base = LHS;
1193     DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1194     return true;
1195   }
1196 
1197   // Try to match a shifted extend on the LHS.
1198   if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1199       SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1200     Base = RHS;
1201     DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1202     return true;
1203   }
1204 
1205   // There was no shift, whatever else we find.
1206   DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1207 
1208   AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend;
1209   // Try to match an unshifted extend on the LHS.
1210   if (IsExtendedRegisterWorthFolding &&
1211       (Ext = getExtendTypeForNode(LHS, true)) !=
1212           AArch64_AM::InvalidShiftExtend) {
1213     Base = RHS;
1214     Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1215     SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1216                                            MVT::i32);
1217     if (isWorthFolding(LHS))
1218       return true;
1219   }
1220 
1221   // Try to match an unshifted extend on the RHS.
1222   if (IsExtendedRegisterWorthFolding &&
1223       (Ext = getExtendTypeForNode(RHS, true)) !=
1224           AArch64_AM::InvalidShiftExtend) {
1225     Base = LHS;
1226     Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1227     SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1228                                            MVT::i32);
1229     if (isWorthFolding(RHS))
1230       return true;
1231   }
1232 
1233   return false;
1234 }
1235 
1236 // Check if the given immediate is preferred by ADD. If an immediate can be
1237 // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1238 // encoded by one MOVZ, return true.
1239 static bool isPreferredADD(int64_t ImmOff) {
1240   // Constant in [0x0, 0xfff] can be encoded in ADD.
1241   if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1242     return true;
1243   // Check if it can be encoded in an "ADD LSL #12".
1244   if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1245     // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1246     return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1247            (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1248   return false;
1249 }
1250 
1251 bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1252                                             SDValue &Base, SDValue &Offset,
1253                                             SDValue &SignExtend,
1254                                             SDValue &DoShift) {
1255   if (N.getOpcode() != ISD::ADD)
1256     return false;
1257   SDValue LHS = N.getOperand(0);
1258   SDValue RHS = N.getOperand(1);
1259   SDLoc DL(N);
1260 
1261   // Check if this particular node is reused in any non-memory related
1262   // operation.  If yes, do not try to fold this node into the address
1263   // computation, since the computation will be kept.
1264   const SDNode *Node = N.getNode();
1265   for (SDNode *UI : Node->uses()) {
1266     if (!isa<MemSDNode>(*UI))
1267       return false;
1268   }
1269 
1270   // Watch out if RHS is a wide immediate, it can not be selected into
1271   // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1272   // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1273   // instructions like:
1274   //     MOV  X0, WideImmediate
1275   //     ADD  X1, BaseReg, X0
1276   //     LDR  X2, [X1, 0]
1277   // For such situation, using [BaseReg, XReg] addressing mode can save one
1278   // ADD/SUB:
1279   //     MOV  X0, WideImmediate
1280   //     LDR  X2, [BaseReg, X0]
1281   if (isa<ConstantSDNode>(RHS)) {
1282     int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
1283     unsigned Scale = Log2_32(Size);
1284     // Skip the immediate can be selected by load/store addressing mode.
1285     // Also skip the immediate can be encoded by a single ADD (SUB is also
1286     // checked by using -ImmOff).
1287     if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
1288         isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1289       return false;
1290 
1291     SDValue Ops[] = { RHS };
1292     SDNode *MOVI =
1293         CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1294     SDValue MOVIV = SDValue(MOVI, 0);
1295     // This ADD of two X register will be selected into [Reg+Reg] mode.
1296     N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1297   }
1298 
1299   // Remember if it is worth folding N when it produces extended register.
1300   bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
1301 
1302   // Try to match a shifted extend on the RHS.
1303   if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1304       SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1305     Base = LHS;
1306     DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1307     return true;
1308   }
1309 
1310   // Try to match a shifted extend on the LHS.
1311   if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1312       SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1313     Base = RHS;
1314     DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1315     return true;
1316   }
1317 
1318   // Match any non-shifted, non-extend, non-immediate add expression.
1319   Base = LHS;
1320   Offset = RHS;
1321   SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1322   DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1323   // Reg1 + Reg2 is free: no check needed.
1324   return true;
1325 }
1326 
1327 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1328   static const unsigned RegClassIDs[] = {
1329       AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1330   static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1331                                      AArch64::dsub2, AArch64::dsub3};
1332 
1333   return createTuple(Regs, RegClassIDs, SubRegs);
1334 }
1335 
1336 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1337   static const unsigned RegClassIDs[] = {
1338       AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1339   static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1340                                      AArch64::qsub2, AArch64::qsub3};
1341 
1342   return createTuple(Regs, RegClassIDs, SubRegs);
1343 }
1344 
1345 SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1346   static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1347                                          AArch64::ZPR3RegClassID,
1348                                          AArch64::ZPR4RegClassID};
1349   static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1350                                      AArch64::zsub2, AArch64::zsub3};
1351 
1352   return createTuple(Regs, RegClassIDs, SubRegs);
1353 }
1354 
1355 SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1356   assert(Regs.size() == 2 || Regs.size() == 4);
1357 
1358   // The createTuple interface requires 3 RegClassIDs for each possible
1359   // tuple type even though we only have them for ZPR2 and ZPR4.
1360   static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1361                                          AArch64::ZPR4Mul4RegClassID};
1362   static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1363                                      AArch64::zsub2, AArch64::zsub3};
1364   return createTuple(Regs, RegClassIDs, SubRegs);
1365 }
1366 
1367 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1368                                          const unsigned RegClassIDs[],
1369                                          const unsigned SubRegs[]) {
1370   // There's no special register-class for a vector-list of 1 element: it's just
1371   // a vector.
1372   if (Regs.size() == 1)
1373     return Regs[0];
1374 
1375   assert(Regs.size() >= 2 && Regs.size() <= 4);
1376 
1377   SDLoc DL(Regs[0]);
1378 
1379   SmallVector<SDValue, 4> Ops;
1380 
1381   // First operand of REG_SEQUENCE is the desired RegClass.
1382   Ops.push_back(
1383       CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1384 
1385   // Then we get pairs of source & subregister-position for the components.
1386   for (unsigned i = 0; i < Regs.size(); ++i) {
1387     Ops.push_back(Regs[i]);
1388     Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1389   }
1390 
1391   SDNode *N =
1392       CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1393   return SDValue(N, 0);
1394 }
1395 
1396 void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1397                                       bool isExt) {
1398   SDLoc dl(N);
1399   EVT VT = N->getValueType(0);
1400 
1401   unsigned ExtOff = isExt;
1402 
1403   // Form a REG_SEQUENCE to force register allocation.
1404   unsigned Vec0Off = ExtOff + 1;
1405   SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
1406                                N->op_begin() + Vec0Off + NumVecs);
1407   SDValue RegSeq = createQTuple(Regs);
1408 
1409   SmallVector<SDValue, 6> Ops;
1410   if (isExt)
1411     Ops.push_back(N->getOperand(1));
1412   Ops.push_back(RegSeq);
1413   Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1414   ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1415 }
1416 
1417 bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1418   LoadSDNode *LD = cast<LoadSDNode>(N);
1419   if (LD->isUnindexed())
1420     return false;
1421   EVT VT = LD->getMemoryVT();
1422   EVT DstVT = N->getValueType(0);
1423   ISD::MemIndexedMode AM = LD->getAddressingMode();
1424   bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1425 
1426   // We're not doing validity checking here. That was done when checking
1427   // if we should mark the load as indexed or not. We're just selecting
1428   // the right instruction.
1429   unsigned Opcode = 0;
1430 
1431   ISD::LoadExtType ExtType = LD->getExtensionType();
1432   bool InsertTo64 = false;
1433   if (VT == MVT::i64)
1434     Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1435   else if (VT == MVT::i32) {
1436     if (ExtType == ISD::NON_EXTLOAD)
1437       Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1438     else if (ExtType == ISD::SEXTLOAD)
1439       Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1440     else {
1441       Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1442       InsertTo64 = true;
1443       // The result of the load is only i32. It's the subreg_to_reg that makes
1444       // it into an i64.
1445       DstVT = MVT::i32;
1446     }
1447   } else if (VT == MVT::i16) {
1448     if (ExtType == ISD::SEXTLOAD) {
1449       if (DstVT == MVT::i64)
1450         Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1451       else
1452         Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1453     } else {
1454       Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1455       InsertTo64 = DstVT == MVT::i64;
1456       // The result of the load is only i32. It's the subreg_to_reg that makes
1457       // it into an i64.
1458       DstVT = MVT::i32;
1459     }
1460   } else if (VT == MVT::i8) {
1461     if (ExtType == ISD::SEXTLOAD) {
1462       if (DstVT == MVT::i64)
1463         Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1464       else
1465         Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1466     } else {
1467       Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1468       InsertTo64 = DstVT == MVT::i64;
1469       // The result of the load is only i32. It's the subreg_to_reg that makes
1470       // it into an i64.
1471       DstVT = MVT::i32;
1472     }
1473   } else if (VT == MVT::f16) {
1474     Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1475   } else if (VT == MVT::bf16) {
1476     Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1477   } else if (VT == MVT::f32) {
1478     Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1479   } else if (VT == MVT::f64 || VT.is64BitVector()) {
1480     Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1481   } else if (VT.is128BitVector()) {
1482     Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1483   } else
1484     return false;
1485   SDValue Chain = LD->getChain();
1486   SDValue Base = LD->getBasePtr();
1487   ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1488   int OffsetVal = (int)OffsetOp->getZExtValue();
1489   SDLoc dl(N);
1490   SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1491   SDValue Ops[] = { Base, Offset, Chain };
1492   SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1493                                        MVT::Other, Ops);
1494 
1495   // Transfer memoperands.
1496   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1497   CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1498 
1499   // Either way, we're replacing the node, so tell the caller that.
1500   SDValue LoadedVal = SDValue(Res, 1);
1501   if (InsertTo64) {
1502     SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1503     LoadedVal =
1504         SDValue(CurDAG->getMachineNode(
1505                     AArch64::SUBREG_TO_REG, dl, MVT::i64,
1506                     CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1507                     SubReg),
1508                 0);
1509   }
1510 
1511   ReplaceUses(SDValue(N, 0), LoadedVal);
1512   ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1513   ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1514   CurDAG->RemoveDeadNode(N);
1515   return true;
1516 }
1517 
1518 void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1519                                      unsigned SubRegIdx) {
1520   SDLoc dl(N);
1521   EVT VT = N->getValueType(0);
1522   SDValue Chain = N->getOperand(0);
1523 
1524   SDValue Ops[] = {N->getOperand(2), // Mem operand;
1525                    Chain};
1526 
1527   const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1528 
1529   SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1530   SDValue SuperReg = SDValue(Ld, 0);
1531   for (unsigned i = 0; i < NumVecs; ++i)
1532     ReplaceUses(SDValue(N, i),
1533         CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1534 
1535   ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1536 
1537   // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1538   // because it's too simple to have needed special treatment during lowering.
1539   if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1540     MachineMemOperand *MemOp = MemIntr->getMemOperand();
1541     CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1542   }
1543 
1544   CurDAG->RemoveDeadNode(N);
1545 }
1546 
1547 void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1548                                          unsigned Opc, unsigned SubRegIdx) {
1549   SDLoc dl(N);
1550   EVT VT = N->getValueType(0);
1551   SDValue Chain = N->getOperand(0);
1552 
1553   SDValue Ops[] = {N->getOperand(1), // Mem operand
1554                    N->getOperand(2), // Incremental
1555                    Chain};
1556 
1557   const EVT ResTys[] = {MVT::i64, // Type of the write back register
1558                         MVT::Untyped, MVT::Other};
1559 
1560   SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1561 
1562   // Update uses of write back register
1563   ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1564 
1565   // Update uses of vector list
1566   SDValue SuperReg = SDValue(Ld, 1);
1567   if (NumVecs == 1)
1568     ReplaceUses(SDValue(N, 0), SuperReg);
1569   else
1570     for (unsigned i = 0; i < NumVecs; ++i)
1571       ReplaceUses(SDValue(N, i),
1572           CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1573 
1574   // Update the chain
1575   ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1576   CurDAG->RemoveDeadNode(N);
1577 }
1578 
1579 /// Optimize \param OldBase and \param OldOffset selecting the best addressing
1580 /// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1581 /// new Base and an SDValue representing the new offset.
1582 std::tuple<unsigned, SDValue, SDValue>
1583 AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1584                                               unsigned Opc_ri,
1585                                               const SDValue &OldBase,
1586                                               const SDValue &OldOffset,
1587                                               unsigned Scale) {
1588   SDValue NewBase = OldBase;
1589   SDValue NewOffset = OldOffset;
1590   // Detect a possible Reg+Imm addressing mode.
1591   const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1592       N, OldBase, NewBase, NewOffset);
1593 
1594   // Detect a possible reg+reg addressing mode, but only if we haven't already
1595   // detected a Reg+Imm one.
1596   const bool IsRegReg =
1597       !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1598 
1599   // Select the instruction.
1600   return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1601 }
1602 
1603 enum class SelectTypeKind {
1604   Int1 = 0,
1605   Int = 1,
1606   FP = 2,
1607   AnyType = 3,
1608 };
1609 
1610 /// This function selects an opcode from a list of opcodes, which is
1611 /// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1612 /// element types, in this order.
1613 template <SelectTypeKind Kind>
1614 static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1615   // Only match scalable vector VTs
1616   if (!VT.isScalableVector())
1617     return 0;
1618 
1619   EVT EltVT = VT.getVectorElementType();
1620   switch (Kind) {
1621   case SelectTypeKind::AnyType:
1622     break;
1623   case SelectTypeKind::Int:
1624     if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
1625         EltVT != MVT::i64)
1626       return 0;
1627     break;
1628   case SelectTypeKind::Int1:
1629     if (EltVT != MVT::i1)
1630       return 0;
1631     break;
1632   case SelectTypeKind::FP:
1633     if (EltVT != MVT::f16 && EltVT != MVT::f32 && EltVT != MVT::f64)
1634       return 0;
1635     break;
1636   }
1637 
1638   unsigned Offset;
1639   switch (VT.getVectorMinNumElements()) {
1640   case 16: // 8-bit
1641     Offset = 0;
1642     break;
1643   case 8: // 16-bit
1644     Offset = 1;
1645     break;
1646   case 4: // 32-bit
1647     Offset = 2;
1648     break;
1649   case 2: // 64-bit
1650     Offset = 3;
1651     break;
1652   default:
1653     return 0;
1654   }
1655 
1656   return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
1657 }
1658 
1659 // This function is almost identical to SelectWhilePair, but has an
1660 // extra check on the range of the immediate operand.
1661 // TODO: Merge these two functions together at some point?
1662 void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
1663   // Immediate can be either 0 or 1.
1664   if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
1665     if (Imm->getZExtValue() > 1)
1666       return;
1667 
1668   SDLoc DL(N);
1669   EVT VT = N->getValueType(0);
1670   SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1671   SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1672   SDValue SuperReg = SDValue(WhilePair, 0);
1673 
1674   for (unsigned I = 0; I < 2; ++I)
1675     ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1676                                    AArch64::psub0 + I, DL, VT, SuperReg));
1677 
1678   CurDAG->RemoveDeadNode(N);
1679 }
1680 
1681 void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
1682   SDLoc DL(N);
1683   EVT VT = N->getValueType(0);
1684 
1685   SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1686 
1687   SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1688   SDValue SuperReg = SDValue(WhilePair, 0);
1689 
1690   for (unsigned I = 0; I < 2; ++I)
1691     ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1692                                    AArch64::psub0 + I, DL, VT, SuperReg));
1693 
1694   CurDAG->RemoveDeadNode(N);
1695 }
1696 
1697 void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
1698                                              unsigned Opcode) {
1699   EVT VT = N->getValueType(0);
1700   SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1701   SDValue Ops = createZTuple(Regs);
1702   SDLoc DL(N);
1703   SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
1704   SDValue SuperReg = SDValue(Intrinsic, 0);
1705   for (unsigned i = 0; i < NumVecs; ++i)
1706     ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1707                                    AArch64::zsub0 + i, DL, VT, SuperReg));
1708 
1709   CurDAG->RemoveDeadNode(N);
1710 }
1711 
1712 void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
1713                                                           unsigned NumVecs,
1714                                                           bool IsZmMulti,
1715                                                           unsigned Opcode,
1716                                                           bool HasPred) {
1717   assert(Opcode != 0 && "Unexpected opcode");
1718 
1719   SDLoc DL(N);
1720   EVT VT = N->getValueType(0);
1721   unsigned FirstVecIdx = HasPred ? 2 : 1;
1722 
1723   auto GetMultiVecOperand = [=](unsigned StartIdx) {
1724     SmallVector<SDValue, 4> Regs(N->op_begin() + StartIdx,
1725                                  N->op_begin() + StartIdx + NumVecs);
1726     return createZMulTuple(Regs);
1727   };
1728 
1729   SDValue Zdn = GetMultiVecOperand(FirstVecIdx);
1730 
1731   SDValue Zm;
1732   if (IsZmMulti)
1733     Zm = GetMultiVecOperand(NumVecs + FirstVecIdx);
1734   else
1735     Zm = N->getOperand(NumVecs + FirstVecIdx);
1736 
1737   SDNode *Intrinsic;
1738   if (HasPred)
1739     Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped,
1740                                        N->getOperand(1), Zdn, Zm);
1741   else
1742     Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm);
1743   SDValue SuperReg = SDValue(Intrinsic, 0);
1744   for (unsigned i = 0; i < NumVecs; ++i)
1745     ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1746                                    AArch64::zsub0 + i, DL, VT, SuperReg));
1747 
1748   CurDAG->RemoveDeadNode(N);
1749 }
1750 
1751 void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
1752                                                unsigned Scale, unsigned Opc_ri,
1753                                                unsigned Opc_rr, bool IsIntr) {
1754   assert(Scale < 4 && "Invalid scaling value.");
1755   SDLoc DL(N);
1756   EVT VT = N->getValueType(0);
1757   SDValue Chain = N->getOperand(0);
1758 
1759   // Optimize addressing mode.
1760   SDValue Base, Offset;
1761   unsigned Opc;
1762   std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
1763       N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
1764       CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
1765 
1766   SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
1767                    Base,                          // Memory operand
1768                    Offset, Chain};
1769 
1770   const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1771 
1772   SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1773   SDValue SuperReg = SDValue(Load, 0);
1774   for (unsigned i = 0; i < NumVecs; ++i)
1775     ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1776                                    AArch64::zsub0 + i, DL, VT, SuperReg));
1777 
1778   // Copy chain
1779   unsigned ChainIdx = NumVecs;
1780   ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
1781   CurDAG->RemoveDeadNode(N);
1782 }
1783 
1784 void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
1785                                                           unsigned NumVecs,
1786                                                           unsigned Scale,
1787                                                           unsigned Opc_ri,
1788                                                           unsigned Opc_rr) {
1789   assert(Scale < 4 && "Invalid scaling value.");
1790   SDLoc DL(N);
1791   EVT VT = N->getValueType(0);
1792   SDValue Chain = N->getOperand(0);
1793 
1794   SDValue PNg = N->getOperand(2);
1795   SDValue Base = N->getOperand(3);
1796   SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
1797   unsigned Opc;
1798   std::tie(Opc, Base, Offset) =
1799       findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);
1800 
1801   SDValue Ops[] = {PNg,            // Predicate-as-counter
1802                    Base,           // Memory operand
1803                    Offset, Chain};
1804 
1805   const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1806 
1807   SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1808   SDValue SuperReg = SDValue(Load, 0);
1809   for (unsigned i = 0; i < NumVecs; ++i)
1810     ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1811                                    AArch64::zsub0 + i, DL, VT, SuperReg));
1812 
1813   // Copy chain
1814   unsigned ChainIdx = NumVecs;
1815   ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
1816   CurDAG->RemoveDeadNode(N);
1817 }
1818 
1819 void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
1820                                             unsigned Opcode) {
1821   if (N->getValueType(0) != MVT::nxv4f32)
1822     return;
1823   SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
1824 }
1825 
1826 void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
1827                                       unsigned Op) {
1828   SDLoc DL(N);
1829   EVT VT = N->getValueType(0);
1830 
1831   SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1832   SDValue Zd = createZMulTuple(Regs);
1833   SDValue Zn = N->getOperand(1 + NumVecs);
1834   SDValue Zm = N->getOperand(2 + NumVecs);
1835 
1836   SDValue Ops[] = {Zd, Zn, Zm};
1837 
1838   SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
1839   SDValue SuperReg = SDValue(Intrinsic, 0);
1840   for (unsigned i = 0; i < NumVecs; ++i)
1841     ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1842                                    AArch64::zsub0 + i, DL, VT, SuperReg));
1843 
1844   CurDAG->RemoveDeadNode(N);
1845 }
1846 
1847 bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
1848   switch (BaseReg) {
1849   default:
1850     return false;
1851   case AArch64::ZA:
1852   case AArch64::ZAB0:
1853     if (TileNum == 0)
1854       break;
1855     return false;
1856   case AArch64::ZAH0:
1857     if (TileNum <= 1)
1858       break;
1859     return false;
1860   case AArch64::ZAS0:
1861     if (TileNum <= 3)
1862       break;
1863     return false;
1864   case AArch64::ZAD0:
1865     if (TileNum <= 7)
1866       break;
1867     return false;
1868   }
1869 
1870   BaseReg += TileNum;
1871   return true;
1872 }
1873 
1874 template <unsigned MaxIdx, unsigned Scale>
1875 void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
1876                                                 unsigned BaseReg, unsigned Op) {
1877   unsigned TileNum = 0;
1878   if (BaseReg != AArch64::ZA)
1879     TileNum = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
1880 
1881   if (!SelectSMETile(BaseReg, TileNum))
1882     return;
1883 
1884   SDValue SliceBase, Base, Offset;
1885   if (BaseReg == AArch64::ZA)
1886     SliceBase = N->getOperand(2);
1887   else
1888     SliceBase = N->getOperand(3);
1889 
1890   if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
1891     return;
1892 
1893   SDLoc DL(N);
1894   SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
1895   SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
1896   SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
1897 
1898   EVT VT = N->getValueType(0);
1899   for (unsigned I = 0; I < NumVecs; ++I)
1900     ReplaceUses(SDValue(N, I),
1901                 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
1902                                                SDValue(Mov, 0)));
1903   // Copy chain
1904   unsigned ChainIdx = NumVecs;
1905   ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
1906   CurDAG->RemoveDeadNode(N);
1907 }
1908 
1909 void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
1910                                                     unsigned NumOutVecs,
1911                                                     bool IsTupleInput,
1912                                                     unsigned Opc) {
1913   SDLoc DL(N);
1914   EVT VT = N->getValueType(0);
1915   unsigned NumInVecs = N->getNumOperands() - 1;
1916 
1917   SmallVector<SDValue, 6> Ops;
1918   if (IsTupleInput) {
1919     assert((NumInVecs == 2 || NumInVecs == 4) &&
1920            "Don't know how to handle multi-register input!");
1921     SmallVector<SDValue, 4> Regs(N->op_begin() + 1,
1922                                  N->op_begin() + 1 + NumInVecs);
1923     Ops.push_back(createZMulTuple(Regs));
1924   } else {
1925     // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
1926     for (unsigned I = 0; I < NumInVecs; I++)
1927       Ops.push_back(N->getOperand(1 + I));
1928   }
1929 
1930   SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1931   SDValue SuperReg = SDValue(Res, 0);
1932 
1933   for (unsigned I = 0; I < NumOutVecs; I++)
1934     ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1935                                    AArch64::zsub0 + I, DL, VT, SuperReg));
1936   CurDAG->RemoveDeadNode(N);
1937 }
1938 
1939 void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
1940                                       unsigned Opc) {
1941   SDLoc dl(N);
1942   EVT VT = N->getOperand(2)->getValueType(0);
1943 
1944   // Form a REG_SEQUENCE to force register allocation.
1945   bool Is128Bit = VT.getSizeInBits() == 128;
1946   SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1947   SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1948 
1949   SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
1950   SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
1951 
1952   // Transfer memoperands.
1953   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1954   CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1955 
1956   ReplaceNode(N, St);
1957 }
1958 
1959 void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
1960                                                 unsigned Scale, unsigned Opc_rr,
1961                                                 unsigned Opc_ri) {
1962   SDLoc dl(N);
1963 
1964   // Form a REG_SEQUENCE to force register allocation.
1965   SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1966   SDValue RegSeq = createZTuple(Regs);
1967 
1968   // Optimize addressing mode.
1969   unsigned Opc;
1970   SDValue Offset, Base;
1971   std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
1972       N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
1973       CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
1974 
1975   SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
1976                    Base,                               // address
1977                    Offset,                             // offset
1978                    N->getOperand(0)};                  // chain
1979   SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
1980 
1981   ReplaceNode(N, St);
1982 }
1983 
1984 bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
1985                                                       SDValue &OffImm) {
1986   SDLoc dl(N);
1987   const DataLayout &DL = CurDAG->getDataLayout();
1988   const TargetLowering *TLI = getTargetLowering();
1989 
1990   // Try to match it for the frame address
1991   if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
1992     int FI = FINode->getIndex();
1993     Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1994     OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1995     return true;
1996   }
1997 
1998   return false;
1999 }
2000 
2001 void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2002                                           unsigned Opc) {
2003   SDLoc dl(N);
2004   EVT VT = N->getOperand(2)->getValueType(0);
2005   const EVT ResTys[] = {MVT::i64,    // Type of the write back register
2006                         MVT::Other}; // Type for the Chain
2007 
2008   // Form a REG_SEQUENCE to force register allocation.
2009   bool Is128Bit = VT.getSizeInBits() == 128;
2010   SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2011   SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2012 
2013   SDValue Ops[] = {RegSeq,
2014                    N->getOperand(NumVecs + 1), // base register
2015                    N->getOperand(NumVecs + 2), // Incremental
2016                    N->getOperand(0)};          // Chain
2017   SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2018 
2019   ReplaceNode(N, St);
2020 }
2021 
2022 namespace {
2023 /// WidenVector - Given a value in the V64 register class, produce the
2024 /// equivalent value in the V128 register class.
2025 class WidenVector {
2026   SelectionDAG &DAG;
2027 
2028 public:
2029   WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2030 
2031   SDValue operator()(SDValue V64Reg) {
2032     EVT VT = V64Reg.getValueType();
2033     unsigned NarrowSize = VT.getVectorNumElements();
2034     MVT EltTy = VT.getVectorElementType().getSimpleVT();
2035     MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
2036     SDLoc DL(V64Reg);
2037 
2038     SDValue Undef =
2039         SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
2040     return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2041   }
2042 };
2043 } // namespace
2044 
2045 /// NarrowVector - Given a value in the V128 register class, produce the
2046 /// equivalent value in the V64 register class.
2047 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
2048   EVT VT = V128Reg.getValueType();
2049   unsigned WideSize = VT.getVectorNumElements();
2050   MVT EltTy = VT.getVectorElementType().getSimpleVT();
2051   MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
2052 
2053   return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2054                                     V128Reg);
2055 }
2056 
2057 void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2058                                          unsigned Opc) {
2059   SDLoc dl(N);
2060   EVT VT = N->getValueType(0);
2061   bool Narrow = VT.getSizeInBits() == 64;
2062 
2063   // Form a REG_SEQUENCE to force register allocation.
2064   SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2065 
2066   if (Narrow)
2067     transform(Regs, Regs.begin(),
2068                    WidenVector(*CurDAG));
2069 
2070   SDValue RegSeq = createQTuple(Regs);
2071 
2072   const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2073 
2074   unsigned LaneNo =
2075       cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
2076 
2077   SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2078                    N->getOperand(NumVecs + 3), N->getOperand(0)};
2079   SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2080   SDValue SuperReg = SDValue(Ld, 0);
2081 
2082   EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2083   static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2084                                     AArch64::qsub2, AArch64::qsub3 };
2085   for (unsigned i = 0; i < NumVecs; ++i) {
2086     SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2087     if (Narrow)
2088       NV = NarrowVector(NV, *CurDAG);
2089     ReplaceUses(SDValue(N, i), NV);
2090   }
2091 
2092   ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
2093   CurDAG->RemoveDeadNode(N);
2094 }
2095 
2096 void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2097                                              unsigned Opc) {
2098   SDLoc dl(N);
2099   EVT VT = N->getValueType(0);
2100   bool Narrow = VT.getSizeInBits() == 64;
2101 
2102   // Form a REG_SEQUENCE to force register allocation.
2103   SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2104 
2105   if (Narrow)
2106     transform(Regs, Regs.begin(),
2107                    WidenVector(*CurDAG));
2108 
2109   SDValue RegSeq = createQTuple(Regs);
2110 
2111   const EVT ResTys[] = {MVT::i64, // Type of the write back register
2112                         RegSeq->getValueType(0), MVT::Other};
2113 
2114   unsigned LaneNo =
2115       cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
2116 
2117   SDValue Ops[] = {RegSeq,
2118                    CurDAG->getTargetConstant(LaneNo, dl,
2119                                              MVT::i64),         // Lane Number
2120                    N->getOperand(NumVecs + 2),                  // Base register
2121                    N->getOperand(NumVecs + 3),                  // Incremental
2122                    N->getOperand(0)};
2123   SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2124 
2125   // Update uses of the write back register
2126   ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
2127 
2128   // Update uses of the vector list
2129   SDValue SuperReg = SDValue(Ld, 1);
2130   if (NumVecs == 1) {
2131     ReplaceUses(SDValue(N, 0),
2132                 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
2133   } else {
2134     EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2135     static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2136                                       AArch64::qsub2, AArch64::qsub3 };
2137     for (unsigned i = 0; i < NumVecs; ++i) {
2138       SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2139                                                   SuperReg);
2140       if (Narrow)
2141         NV = NarrowVector(NV, *CurDAG);
2142       ReplaceUses(SDValue(N, i), NV);
2143     }
2144   }
2145 
2146   // Update the Chain
2147   ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2148   CurDAG->RemoveDeadNode(N);
2149 }
2150 
2151 void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2152                                           unsigned Opc) {
2153   SDLoc dl(N);
2154   EVT VT = N->getOperand(2)->getValueType(0);
2155   bool Narrow = VT.getSizeInBits() == 64;
2156 
2157   // Form a REG_SEQUENCE to force register allocation.
2158   SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2159 
2160   if (Narrow)
2161     transform(Regs, Regs.begin(),
2162                    WidenVector(*CurDAG));
2163 
2164   SDValue RegSeq = createQTuple(Regs);
2165 
2166   unsigned LaneNo =
2167       cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
2168 
2169   SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2170                    N->getOperand(NumVecs + 3), N->getOperand(0)};
2171   SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2172 
2173   // Transfer memoperands.
2174   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2175   CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2176 
2177   ReplaceNode(N, St);
2178 }
2179 
2180 void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2181                                               unsigned Opc) {
2182   SDLoc dl(N);
2183   EVT VT = N->getOperand(2)->getValueType(0);
2184   bool Narrow = VT.getSizeInBits() == 64;
2185 
2186   // Form a REG_SEQUENCE to force register allocation.
2187   SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2188 
2189   if (Narrow)
2190     transform(Regs, Regs.begin(),
2191                    WidenVector(*CurDAG));
2192 
2193   SDValue RegSeq = createQTuple(Regs);
2194 
2195   const EVT ResTys[] = {MVT::i64, // Type of the write back register
2196                         MVT::Other};
2197 
2198   unsigned LaneNo =
2199       cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
2200 
2201   SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2202                    N->getOperand(NumVecs + 2), // Base Register
2203                    N->getOperand(NumVecs + 3), // Incremental
2204                    N->getOperand(0)};
2205   SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2206 
2207   // Transfer memoperands.
2208   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2209   CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2210 
2211   ReplaceNode(N, St);
2212 }
2213 
2214 static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
2215                                        unsigned &Opc, SDValue &Opd0,
2216                                        unsigned &LSB, unsigned &MSB,
2217                                        unsigned NumberOfIgnoredLowBits,
2218                                        bool BiggerPattern) {
2219   assert(N->getOpcode() == ISD::AND &&
2220          "N must be a AND operation to call this function");
2221 
2222   EVT VT = N->getValueType(0);
2223 
2224   // Here we can test the type of VT and return false when the type does not
2225   // match, but since it is done prior to that call in the current context
2226   // we turned that into an assert to avoid redundant code.
2227   assert((VT == MVT::i32 || VT == MVT::i64) &&
2228          "Type checking must have been done before calling this function");
2229 
2230   // FIXME: simplify-demanded-bits in DAGCombine will probably have
2231   // changed the AND node to a 32-bit mask operation. We'll have to
2232   // undo that as part of the transform here if we want to catch all
2233   // the opportunities.
2234   // Currently the NumberOfIgnoredLowBits argument helps to recover
2235   // from these situations when matching bigger pattern (bitfield insert).
2236 
2237   // For unsigned extracts, check for a shift right and mask
2238   uint64_t AndImm = 0;
2239   if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2240     return false;
2241 
2242   const SDNode *Op0 = N->getOperand(0).getNode();
2243 
2244   // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2245   // simplified. Try to undo that
2246   AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2247 
2248   // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2249   if (AndImm & (AndImm + 1))
2250     return false;
2251 
2252   bool ClampMSB = false;
2253   uint64_t SrlImm = 0;
2254   // Handle the SRL + ANY_EXTEND case.
2255   if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2256       isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2257     // Extend the incoming operand of the SRL to 64-bit.
2258     Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2259     // Make sure to clamp the MSB so that we preserve the semantics of the
2260     // original operations.
2261     ClampMSB = true;
2262   } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2263              isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL,
2264                                    SrlImm)) {
2265     // If the shift result was truncated, we can still combine them.
2266     Opd0 = Op0->getOperand(0).getOperand(0);
2267 
2268     // Use the type of SRL node.
2269     VT = Opd0->getValueType(0);
2270   } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2271     Opd0 = Op0->getOperand(0);
2272     ClampMSB = (VT == MVT::i32);
2273   } else if (BiggerPattern) {
2274     // Let's pretend a 0 shift right has been performed.
2275     // The resulting code will be at least as good as the original one
2276     // plus it may expose more opportunities for bitfield insert pattern.
2277     // FIXME: Currently we limit this to the bigger pattern, because
2278     // some optimizations expect AND and not UBFM.
2279     Opd0 = N->getOperand(0);
2280   } else
2281     return false;
2282 
2283   // Bail out on large immediates. This happens when no proper
2284   // combining/constant folding was performed.
2285   if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2286     LLVM_DEBUG(
2287         (dbgs() << N
2288                 << ": Found large shift immediate, this should not happen\n"));
2289     return false;
2290   }
2291 
2292   LSB = SrlImm;
2293   MSB = SrlImm +
2294         (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2295                         : llvm::countr_one<uint64_t>(AndImm)) -
2296         1;
2297   if (ClampMSB)
2298     // Since we're moving the extend before the right shift operation, we need
2299     // to clamp the MSB to make sure we don't shift in undefined bits instead of
2300     // the zeros which would get shifted in with the original right shift
2301     // operation.
2302     MSB = MSB > 31 ? 31 : MSB;
2303 
2304   Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2305   return true;
2306 }
2307 
2308 static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
2309                                              SDValue &Opd0, unsigned &Immr,
2310                                              unsigned &Imms) {
2311   assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2312 
2313   EVT VT = N->getValueType(0);
2314   unsigned BitWidth = VT.getSizeInBits();
2315   assert((VT == MVT::i32 || VT == MVT::i64) &&
2316          "Type checking must have been done before calling this function");
2317 
2318   SDValue Op = N->getOperand(0);
2319   if (Op->getOpcode() == ISD::TRUNCATE) {
2320     Op = Op->getOperand(0);
2321     VT = Op->getValueType(0);
2322     BitWidth = VT.getSizeInBits();
2323   }
2324 
2325   uint64_t ShiftImm;
2326   if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2327       !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2328     return false;
2329 
2330   unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2331   if (ShiftImm + Width > BitWidth)
2332     return false;
2333 
2334   Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2335   Opd0 = Op.getOperand(0);
2336   Immr = ShiftImm;
2337   Imms = ShiftImm + Width - 1;
2338   return true;
2339 }
2340 
2341 static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
2342                                           SDValue &Opd0, unsigned &LSB,
2343                                           unsigned &MSB) {
2344   // We are looking for the following pattern which basically extracts several
2345   // continuous bits from the source value and places it from the LSB of the
2346   // destination value, all other bits of the destination value or set to zero:
2347   //
2348   // Value2 = AND Value, MaskImm
2349   // SRL Value2, ShiftImm
2350   //
2351   // with MaskImm >> ShiftImm to search for the bit width.
2352   //
2353   // This gets selected into a single UBFM:
2354   //
2355   // UBFM Value, ShiftImm, Log2_64(MaskImm)
2356   //
2357 
2358   if (N->getOpcode() != ISD::SRL)
2359     return false;
2360 
2361   uint64_t AndMask = 0;
2362   if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2363     return false;
2364 
2365   Opd0 = N->getOperand(0).getOperand(0);
2366 
2367   uint64_t SrlImm = 0;
2368   if (!isIntImmediate(N->getOperand(1), SrlImm))
2369     return false;
2370 
2371   // Check whether we really have several bits extract here.
2372   if (!isMask_64(AndMask >> SrlImm))
2373     return false;
2374 
2375   Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2376   LSB = SrlImm;
2377   MSB = llvm::Log2_64(AndMask);
2378   return true;
2379 }
2380 
2381 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2382                                        unsigned &Immr, unsigned &Imms,
2383                                        bool BiggerPattern) {
2384   assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2385          "N must be a SHR/SRA operation to call this function");
2386 
2387   EVT VT = N->getValueType(0);
2388 
2389   // Here we can test the type of VT and return false when the type does not
2390   // match, but since it is done prior to that call in the current context
2391   // we turned that into an assert to avoid redundant code.
2392   assert((VT == MVT::i32 || VT == MVT::i64) &&
2393          "Type checking must have been done before calling this function");
2394 
2395   // Check for AND + SRL doing several bits extract.
2396   if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2397     return true;
2398 
2399   // We're looking for a shift of a shift.
2400   uint64_t ShlImm = 0;
2401   uint64_t TruncBits = 0;
2402   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2403     Opd0 = N->getOperand(0).getOperand(0);
2404   } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2405              N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2406     // We are looking for a shift of truncate. Truncate from i64 to i32 could
2407     // be considered as setting high 32 bits as zero. Our strategy here is to
2408     // always generate 64bit UBFM. This consistency will help the CSE pass
2409     // later find more redundancy.
2410     Opd0 = N->getOperand(0).getOperand(0);
2411     TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2412     VT = Opd0.getValueType();
2413     assert(VT == MVT::i64 && "the promoted type should be i64");
2414   } else if (BiggerPattern) {
2415     // Let's pretend a 0 shift left has been performed.
2416     // FIXME: Currently we limit this to the bigger pattern case,
2417     // because some optimizations expect AND and not UBFM
2418     Opd0 = N->getOperand(0);
2419   } else
2420     return false;
2421 
2422   // Missing combines/constant folding may have left us with strange
2423   // constants.
2424   if (ShlImm >= VT.getSizeInBits()) {
2425     LLVM_DEBUG(
2426         (dbgs() << N
2427                 << ": Found large shift immediate, this should not happen\n"));
2428     return false;
2429   }
2430 
2431   uint64_t SrlImm = 0;
2432   if (!isIntImmediate(N->getOperand(1), SrlImm))
2433     return false;
2434 
2435   assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2436          "bad amount in shift node!");
2437   int immr = SrlImm - ShlImm;
2438   Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2439   Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2440   // SRA requires a signed extraction
2441   if (VT == MVT::i32)
2442     Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2443   else
2444     Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2445   return true;
2446 }
2447 
2448 bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2449   assert(N->getOpcode() == ISD::SIGN_EXTEND);
2450 
2451   EVT VT = N->getValueType(0);
2452   EVT NarrowVT = N->getOperand(0)->getValueType(0);
2453   if (VT != MVT::i64 || NarrowVT != MVT::i32)
2454     return false;
2455 
2456   uint64_t ShiftImm;
2457   SDValue Op = N->getOperand(0);
2458   if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2459     return false;
2460 
2461   SDLoc dl(N);
2462   // Extend the incoming operand of the shift to 64-bits.
2463   SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2464   unsigned Immr = ShiftImm;
2465   unsigned Imms = NarrowVT.getSizeInBits() - 1;
2466   SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2467                    CurDAG->getTargetConstant(Imms, dl, VT)};
2468   CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2469   return true;
2470 }
2471 
2472 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2473                                 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2474                                 unsigned NumberOfIgnoredLowBits = 0,
2475                                 bool BiggerPattern = false) {
2476   if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2477     return false;
2478 
2479   switch (N->getOpcode()) {
2480   default:
2481     if (!N->isMachineOpcode())
2482       return false;
2483     break;
2484   case ISD::AND:
2485     return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2486                                       NumberOfIgnoredLowBits, BiggerPattern);
2487   case ISD::SRL:
2488   case ISD::SRA:
2489     return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2490 
2491   case ISD::SIGN_EXTEND_INREG:
2492     return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2493   }
2494 
2495   unsigned NOpc = N->getMachineOpcode();
2496   switch (NOpc) {
2497   default:
2498     return false;
2499   case AArch64::SBFMWri:
2500   case AArch64::UBFMWri:
2501   case AArch64::SBFMXri:
2502   case AArch64::UBFMXri:
2503     Opc = NOpc;
2504     Opd0 = N->getOperand(0);
2505     Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
2506     Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
2507     return true;
2508   }
2509   // Unreachable
2510   return false;
2511 }
2512 
2513 bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2514   unsigned Opc, Immr, Imms;
2515   SDValue Opd0;
2516   if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2517     return false;
2518 
2519   EVT VT = N->getValueType(0);
2520   SDLoc dl(N);
2521 
2522   // If the bit extract operation is 64bit but the original type is 32bit, we
2523   // need to add one EXTRACT_SUBREG.
2524   if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2525     SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
2526                        CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
2527 
2528     SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
2529     SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
2530                                                    MVT::i32, SDValue(BFM, 0));
2531     ReplaceNode(N, Inner.getNode());
2532     return true;
2533   }
2534 
2535   SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2536                    CurDAG->getTargetConstant(Imms, dl, VT)};
2537   CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2538   return true;
2539 }
2540 
2541 /// Does DstMask form a complementary pair with the mask provided by
2542 /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2543 /// this asks whether DstMask zeroes precisely those bits that will be set by
2544 /// the other half.
2545 static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2546                               unsigned NumberOfIgnoredHighBits, EVT VT) {
2547   assert((VT == MVT::i32 || VT == MVT::i64) &&
2548          "i32 or i64 mask type expected!");
2549   unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2550 
2551   APInt SignificantDstMask = APInt(BitWidth, DstMask);
2552   APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
2553 
2554   return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2555          (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
2556 }
2557 
2558 // Look for bits that will be useful for later uses.
2559 // A bit is consider useless as soon as it is dropped and never used
2560 // before it as been dropped.
2561 // E.g., looking for useful bit of x
2562 // 1. y = x & 0x7
2563 // 2. z = y >> 2
2564 // After #1, x useful bits are 0x7, then the useful bits of x, live through
2565 // y.
2566 // After #2, the useful bits of x are 0x4.
2567 // However, if x is used on an unpredicatable instruction, then all its bits
2568 // are useful.
2569 // E.g.
2570 // 1. y = x & 0x7
2571 // 2. z = y >> 2
2572 // 3. str x, [@x]
2573 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
2574 
2575 static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits,
2576                                               unsigned Depth) {
2577   uint64_t Imm =
2578       cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2579   Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
2580   UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
2581   getUsefulBits(Op, UsefulBits, Depth + 1);
2582 }
2583 
2584 static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
2585                                              uint64_t Imm, uint64_t MSB,
2586                                              unsigned Depth) {
2587   // inherit the bitwidth value
2588   APInt OpUsefulBits(UsefulBits);
2589   OpUsefulBits = 1;
2590 
2591   if (MSB >= Imm) {
2592     OpUsefulBits <<= MSB - Imm + 1;
2593     --OpUsefulBits;
2594     // The interesting part will be in the lower part of the result
2595     getUsefulBits(Op, OpUsefulBits, Depth + 1);
2596     // The interesting part was starting at Imm in the argument
2597     OpUsefulBits <<= Imm;
2598   } else {
2599     OpUsefulBits <<= MSB + 1;
2600     --OpUsefulBits;
2601     // The interesting part will be shifted in the result
2602     OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
2603     getUsefulBits(Op, OpUsefulBits, Depth + 1);
2604     // The interesting part was at zero in the argument
2605     OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
2606   }
2607 
2608   UsefulBits &= OpUsefulBits;
2609 }
2610 
2611 static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
2612                                   unsigned Depth) {
2613   uint64_t Imm =
2614       cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2615   uint64_t MSB =
2616       cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2617 
2618   getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
2619 }
2620 
2621 static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits,
2622                                               unsigned Depth) {
2623   uint64_t ShiftTypeAndValue =
2624       cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2625   APInt Mask(UsefulBits);
2626   Mask.clearAllBits();
2627   Mask.flipAllBits();
2628 
2629   if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
2630     // Shift Left
2631     uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2632     Mask <<= ShiftAmt;
2633     getUsefulBits(Op, Mask, Depth + 1);
2634     Mask.lshrInPlace(ShiftAmt);
2635   } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
2636     // Shift Right
2637     // We do not handle AArch64_AM::ASR, because the sign will change the
2638     // number of useful bits
2639     uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2640     Mask.lshrInPlace(ShiftAmt);
2641     getUsefulBits(Op, Mask, Depth + 1);
2642     Mask <<= ShiftAmt;
2643   } else
2644     return;
2645 
2646   UsefulBits &= Mask;
2647 }
2648 
2649 static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
2650                                  unsigned Depth) {
2651   uint64_t Imm =
2652       cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2653   uint64_t MSB =
2654       cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
2655 
2656   APInt OpUsefulBits(UsefulBits);
2657   OpUsefulBits = 1;
2658 
2659   APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
2660   ResultUsefulBits.flipAllBits();
2661   APInt Mask(UsefulBits.getBitWidth(), 0);
2662 
2663   getUsefulBits(Op, ResultUsefulBits, Depth + 1);
2664 
2665   if (MSB >= Imm) {
2666     // The instruction is a BFXIL.
2667     uint64_t Width = MSB - Imm + 1;
2668     uint64_t LSB = Imm;
2669 
2670     OpUsefulBits <<= Width;
2671     --OpUsefulBits;
2672 
2673     if (Op.getOperand(1) == Orig) {
2674       // Copy the low bits from the result to bits starting from LSB.
2675       Mask = ResultUsefulBits & OpUsefulBits;
2676       Mask <<= LSB;
2677     }
2678 
2679     if (Op.getOperand(0) == Orig)
2680       // Bits starting from LSB in the input contribute to the result.
2681       Mask |= (ResultUsefulBits & ~OpUsefulBits);
2682   } else {
2683     // The instruction is a BFI.
2684     uint64_t Width = MSB + 1;
2685     uint64_t LSB = UsefulBits.getBitWidth() - Imm;
2686 
2687     OpUsefulBits <<= Width;
2688     --OpUsefulBits;
2689     OpUsefulBits <<= LSB;
2690 
2691     if (Op.getOperand(1) == Orig) {
2692       // Copy the bits from the result to the zero bits.
2693       Mask = ResultUsefulBits & OpUsefulBits;
2694       Mask.lshrInPlace(LSB);
2695     }
2696 
2697     if (Op.getOperand(0) == Orig)
2698       Mask |= (ResultUsefulBits & ~OpUsefulBits);
2699   }
2700 
2701   UsefulBits &= Mask;
2702 }
2703 
2704 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
2705                                 SDValue Orig, unsigned Depth) {
2706 
2707   // Users of this node should have already been instruction selected
2708   // FIXME: Can we turn that into an assert?
2709   if (!UserNode->isMachineOpcode())
2710     return;
2711 
2712   switch (UserNode->getMachineOpcode()) {
2713   default:
2714     return;
2715   case AArch64::ANDSWri:
2716   case AArch64::ANDSXri:
2717   case AArch64::ANDWri:
2718   case AArch64::ANDXri:
2719     // We increment Depth only when we call the getUsefulBits
2720     return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
2721                                              Depth);
2722   case AArch64::UBFMWri:
2723   case AArch64::UBFMXri:
2724     return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
2725 
2726   case AArch64::ORRWrs:
2727   case AArch64::ORRXrs:
2728     if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
2729       getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
2730                                         Depth);
2731     return;
2732   case AArch64::BFMWri:
2733   case AArch64::BFMXri:
2734     return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
2735 
2736   case AArch64::STRBBui:
2737   case AArch64::STURBBi:
2738     if (UserNode->getOperand(0) != Orig)
2739       return;
2740     UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
2741     return;
2742 
2743   case AArch64::STRHHui:
2744   case AArch64::STURHHi:
2745     if (UserNode->getOperand(0) != Orig)
2746       return;
2747     UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
2748     return;
2749   }
2750 }
2751 
2752 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
2753   if (Depth >= SelectionDAG::MaxRecursionDepth)
2754     return;
2755   // Initialize UsefulBits
2756   if (!Depth) {
2757     unsigned Bitwidth = Op.getScalarValueSizeInBits();
2758     // At the beginning, assume every produced bits is useful
2759     UsefulBits = APInt(Bitwidth, 0);
2760     UsefulBits.flipAllBits();
2761   }
2762   APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
2763 
2764   for (SDNode *Node : Op.getNode()->uses()) {
2765     // A use cannot produce useful bits
2766     APInt UsefulBitsForUse = APInt(UsefulBits);
2767     getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
2768     UsersUsefulBits |= UsefulBitsForUse;
2769   }
2770   // UsefulBits contains the produced bits that are meaningful for the
2771   // current definition, thus a user cannot make a bit meaningful at
2772   // this point
2773   UsefulBits &= UsersUsefulBits;
2774 }
2775 
2776 /// Create a machine node performing a notional SHL of Op by ShlAmount. If
2777 /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
2778 /// 0, return Op unchanged.
2779 static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
2780   if (ShlAmount == 0)
2781     return Op;
2782 
2783   EVT VT = Op.getValueType();
2784   SDLoc dl(Op);
2785   unsigned BitWidth = VT.getSizeInBits();
2786   unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2787 
2788   SDNode *ShiftNode;
2789   if (ShlAmount > 0) {
2790     // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
2791     ShiftNode = CurDAG->getMachineNode(
2792         UBFMOpc, dl, VT, Op,
2793         CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
2794         CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
2795   } else {
2796     // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
2797     assert(ShlAmount < 0 && "expected right shift");
2798     int ShrAmount = -ShlAmount;
2799     ShiftNode = CurDAG->getMachineNode(
2800         UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
2801         CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
2802   }
2803 
2804   return SDValue(ShiftNode, 0);
2805 }
2806 
2807 // For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
2808 static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
2809                                            bool BiggerPattern,
2810                                            const uint64_t NonZeroBits,
2811                                            SDValue &Src, int &DstLSB,
2812                                            int &Width);
2813 
2814 // For bit-field-positioning pattern "shl VAL, N)".
2815 static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
2816                                            bool BiggerPattern,
2817                                            const uint64_t NonZeroBits,
2818                                            SDValue &Src, int &DstLSB,
2819                                            int &Width);
2820 
2821 /// Does this tree qualify as an attempt to move a bitfield into position,
2822 /// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
2823 static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
2824                                     bool BiggerPattern, SDValue &Src,
2825                                     int &DstLSB, int &Width) {
2826   EVT VT = Op.getValueType();
2827   unsigned BitWidth = VT.getSizeInBits();
2828   (void)BitWidth;
2829   assert(BitWidth == 32 || BitWidth == 64);
2830 
2831   KnownBits Known = CurDAG->computeKnownBits(Op);
2832 
2833   // Non-zero in the sense that they're not provably zero, which is the key
2834   // point if we want to use this value
2835   const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
2836   if (!isShiftedMask_64(NonZeroBits))
2837     return false;
2838 
2839   switch (Op.getOpcode()) {
2840   default:
2841     break;
2842   case ISD::AND:
2843     return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
2844                                           NonZeroBits, Src, DstLSB, Width);
2845   case ISD::SHL:
2846     return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
2847                                           NonZeroBits, Src, DstLSB, Width);
2848   }
2849 
2850   return false;
2851 }
2852 
2853 static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
2854                                            bool BiggerPattern,
2855                                            const uint64_t NonZeroBits,
2856                                            SDValue &Src, int &DstLSB,
2857                                            int &Width) {
2858   assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
2859 
2860   EVT VT = Op.getValueType();
2861   assert((VT == MVT::i32 || VT == MVT::i64) &&
2862          "Caller guarantees VT is one of i32 or i64");
2863   (void)VT;
2864 
2865   uint64_t AndImm;
2866   if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
2867     return false;
2868 
2869   // If (~AndImm & NonZeroBits) is not zero at POS, we know that
2870   //   1) (AndImm & (1 << POS) == 0)
2871   //   2) the result of AND is not zero at POS bit (according to NonZeroBits)
2872   //
2873   // 1) and 2) don't agree so something must be wrong (e.g., in
2874   // 'SelectionDAG::computeKnownBits')
2875   assert((~AndImm & NonZeroBits) == 0 &&
2876          "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
2877 
2878   SDValue AndOp0 = Op.getOperand(0);
2879 
2880   uint64_t ShlImm;
2881   SDValue ShlOp0;
2882   if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
2883     // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
2884     ShlOp0 = AndOp0.getOperand(0);
2885   } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
2886              isOpcWithIntImmediate(AndOp0.getOperand(0).getNode(), ISD::SHL,
2887                                    ShlImm)) {
2888     // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
2889 
2890     // ShlVal == shl(val, N), which is a left shift on a smaller type.
2891     SDValue ShlVal = AndOp0.getOperand(0);
2892 
2893     // Since this is after type legalization and ShlVal is extended to MVT::i64,
2894     // expect VT to be MVT::i32.
2895     assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
2896 
2897     // Widens 'val' to MVT::i64 as the source of bit field positioning.
2898     ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
2899   } else
2900     return false;
2901 
2902   // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
2903   // then we'll end up generating AndOp0+UBFIZ instead of just keeping
2904   // AndOp0+AND.
2905   if (!BiggerPattern && !AndOp0.hasOneUse())
2906     return false;
2907 
2908   DstLSB = llvm::countr_zero(NonZeroBits);
2909   Width = llvm::countr_one(NonZeroBits >> DstLSB);
2910 
2911   // Bail out on large Width. This happens when no proper combining / constant
2912   // folding was performed.
2913   if (Width >= (int)VT.getSizeInBits()) {
2914     // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
2915     // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
2916     // "val".
2917     // If VT is i32, what Width >= 32 means:
2918     // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
2919     //   demands at least 'Width' bits (after dag-combiner). This together with
2920     //   `any_extend` Op (undefined higher bits) indicates missed combination
2921     //   when lowering the 'and' IR instruction to an machine IR instruction.
2922     LLVM_DEBUG(
2923         dbgs()
2924         << "Found large Width in bit-field-positioning -- this indicates no "
2925            "proper combining / constant folding was performed\n");
2926     return false;
2927   }
2928 
2929   // BFI encompasses sufficiently many nodes that it's worth inserting an extra
2930   // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
2931   // amount.  BiggerPattern is true when this pattern is being matched for BFI,
2932   // BiggerPattern is false when this pattern is being matched for UBFIZ, in
2933   // which case it is not profitable to insert an extra shift.
2934   if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
2935     return false;
2936 
2937   Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
2938   return true;
2939 }
2940 
2941 // For node (shl (and val, mask), N)), returns true if the node is equivalent to
2942 // UBFIZ.
2943 static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op,
2944                                               SDValue &Src, int &DstLSB,
2945                                               int &Width) {
2946   // Caller should have verified that N is a left shift with constant shift
2947   // amount; asserts that.
2948   assert(Op.getOpcode() == ISD::SHL &&
2949          "Op.getNode() should be a SHL node to call this function");
2950   assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
2951          "Op.getNode() should shift ShlImm to call this function");
2952 
2953   uint64_t AndImm = 0;
2954   SDValue Op0 = Op.getOperand(0);
2955   if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
2956     return false;
2957 
2958   const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
2959   if (isMask_64(ShiftedAndImm)) {
2960     // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
2961     // should end with Mask, and could be prefixed with random bits if those
2962     // bits are shifted out.
2963     //
2964     // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
2965     // the AND result corresponding to those bits are shifted out, so it's fine
2966     // to not extract them.
2967     Width = llvm::countr_one(ShiftedAndImm);
2968     DstLSB = ShlImm;
2969     Src = Op0.getOperand(0);
2970     return true;
2971   }
2972   return false;
2973 }
2974 
2975 static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
2976                                            bool BiggerPattern,
2977                                            const uint64_t NonZeroBits,
2978                                            SDValue &Src, int &DstLSB,
2979                                            int &Width) {
2980   assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
2981 
2982   EVT VT = Op.getValueType();
2983   assert((VT == MVT::i32 || VT == MVT::i64) &&
2984          "Caller guarantees that type is i32 or i64");
2985   (void)VT;
2986 
2987   uint64_t ShlImm;
2988   if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
2989     return false;
2990 
2991   if (!BiggerPattern && !Op.hasOneUse())
2992     return false;
2993 
2994   if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
2995     return true;
2996 
2997   DstLSB = llvm::countr_zero(NonZeroBits);
2998   Width = llvm::countr_one(NonZeroBits >> DstLSB);
2999 
3000   if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3001     return false;
3002 
3003   Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
3004   return true;
3005 }
3006 
3007 static bool isShiftedMask(uint64_t Mask, EVT VT) {
3008   assert(VT == MVT::i32 || VT == MVT::i64);
3009   if (VT == MVT::i32)
3010     return isShiftedMask_32(Mask);
3011   return isShiftedMask_64(Mask);
3012 }
3013 
3014 // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3015 // inserted only sets known zero bits.
3016 static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) {
3017   assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3018 
3019   EVT VT = N->getValueType(0);
3020   if (VT != MVT::i32 && VT != MVT::i64)
3021     return false;
3022 
3023   unsigned BitWidth = VT.getSizeInBits();
3024 
3025   uint64_t OrImm;
3026   if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
3027     return false;
3028 
3029   // Skip this transformation if the ORR immediate can be encoded in the ORR.
3030   // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3031   // performance neutral.
3032   if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth))
3033     return false;
3034 
3035   uint64_t MaskImm;
3036   SDValue And = N->getOperand(0);
3037   // Must be a single use AND with an immediate operand.
3038   if (!And.hasOneUse() ||
3039       !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
3040     return false;
3041 
3042   // Compute the Known Zero for the AND as this allows us to catch more general
3043   // cases than just looking for AND with imm.
3044   KnownBits Known = CurDAG->computeKnownBits(And);
3045 
3046   // Non-zero in the sense that they're not provably zero, which is the key
3047   // point if we want to use this value.
3048   uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3049 
3050   // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3051   if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
3052     return false;
3053 
3054   // The bits being inserted must only set those bits that are known to be zero.
3055   if ((OrImm & NotKnownZero) != 0) {
3056     // FIXME:  It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3057     // currently handle this case.
3058     return false;
3059   }
3060 
3061   // BFI/BFXIL dst, src, #lsb, #width.
3062   int LSB = llvm::countr_one(NotKnownZero);
3063   int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3064 
3065   // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3066   unsigned ImmR = (BitWidth - LSB) % BitWidth;
3067   unsigned ImmS = Width - 1;
3068 
3069   // If we're creating a BFI instruction avoid cases where we need more
3070   // instructions to materialize the BFI constant as compared to the original
3071   // ORR.  A BFXIL will use the same constant as the original ORR, so the code
3072   // should be no worse in this case.
3073   bool IsBFI = LSB != 0;
3074   uint64_t BFIImm = OrImm >> LSB;
3075   if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
3076     // We have a BFI instruction and we know the constant can't be materialized
3077     // with a ORR-immediate with the zero register.
3078     unsigned OrChunks = 0, BFIChunks = 0;
3079     for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3080       if (((OrImm >> Shift) & 0xFFFF) != 0)
3081         ++OrChunks;
3082       if (((BFIImm >> Shift) & 0xFFFF) != 0)
3083         ++BFIChunks;
3084     }
3085     if (BFIChunks > OrChunks)
3086       return false;
3087   }
3088 
3089   // Materialize the constant to be inserted.
3090   SDLoc DL(N);
3091   unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3092   SDNode *MOVI = CurDAG->getMachineNode(
3093       MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
3094 
3095   // Create the BFI/BFXIL instruction.
3096   SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
3097                    CurDAG->getTargetConstant(ImmR, DL, VT),
3098                    CurDAG->getTargetConstant(ImmS, DL, VT)};
3099   unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3100   CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3101   return true;
3102 }
3103 
3104 static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG,
3105                                            SDValue &ShiftedOperand,
3106                                            uint64_t &EncodedShiftImm) {
3107   // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3108   if (!Dst.hasOneUse())
3109     return false;
3110 
3111   EVT VT = Dst.getValueType();
3112   assert((VT == MVT::i32 || VT == MVT::i64) &&
3113          "Caller should guarantee that VT is one of i32 or i64");
3114   const unsigned SizeInBits = VT.getSizeInBits();
3115 
3116   SDLoc DL(Dst.getNode());
3117   uint64_t AndImm, ShlImm;
3118   if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3119       isShiftedMask_64(AndImm)) {
3120     // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3121     SDValue DstOp0 = Dst.getOperand(0);
3122     if (!DstOp0.hasOneUse())
3123       return false;
3124 
3125     // An example to illustrate the transformation
3126     // From:
3127     //    lsr     x8, x1, #1
3128     //    and     x8, x8, #0x3f80
3129     //    bfxil   x8, x1, #0, #7
3130     // To:
3131     //    and    x8, x23, #0x7f
3132     //    ubfx   x9, x23, #8, #7
3133     //    orr    x23, x8, x9, lsl #7
3134     //
3135     // The number of instructions remains the same, but ORR is faster than BFXIL
3136     // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3137     // the dependency chain is improved after the transformation.
3138     uint64_t SrlImm;
3139     if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3140       uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
3141       if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3142         unsigned MaskWidth =
3143             llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
3144         unsigned UBFMOpc =
3145             (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3146         SDNode *UBFMNode = CurDAG->getMachineNode(
3147             UBFMOpc, DL, VT, DstOp0.getOperand(0),
3148             CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3149                                       VT),
3150             CurDAG->getTargetConstant(
3151                 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3152         ShiftedOperand = SDValue(UBFMNode, 0);
3153         EncodedShiftImm = AArch64_AM::getShifterImm(
3154             AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3155         return true;
3156       }
3157     }
3158     return false;
3159   }
3160 
3161   if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3162     ShiftedOperand = Dst.getOperand(0);
3163     EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3164     return true;
3165   }
3166 
3167   uint64_t SrlImm;
3168   if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3169     ShiftedOperand = Dst.getOperand(0);
3170     EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3171     return true;
3172   }
3173   return false;
3174 }
3175 
3176 // Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3177 // the operands and select it to AArch64::ORR with shifted registers if
3178 // that's more efficient. Returns true iff selection to AArch64::ORR happens.
3179 static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3180                             SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3181                             const bool BiggerPattern) {
3182   EVT VT = N->getValueType(0);
3183   assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3184   assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3185           (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3186          "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3187   assert((VT == MVT::i32 || VT == MVT::i64) &&
3188          "Expect result type to be i32 or i64 since N is combinable to BFM");
3189   SDLoc DL(N);
3190 
3191   // Bail out if BFM simplifies away one node in BFM Dst.
3192   if (OrOpd1 != Dst)
3193     return false;
3194 
3195   const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3196   // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3197   // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3198   if (BiggerPattern) {
3199     uint64_t SrcAndImm;
3200     if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3201         isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3202       // OrOpd0 = AND Src, #Mask
3203       // So BFM simplifies away one AND node from Src and doesn't simplify away
3204       // nodes from Dst. If ORR with left-shifted operand also simplifies away
3205       // one node (from Rd), ORR is better since it has higher throughput and
3206       // smaller latency than BFM on many AArch64 processors (and for the rest
3207       // ORR is at least as good as BFM).
3208       SDValue ShiftedOperand;
3209       uint64_t EncodedShiftImm;
3210       if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3211                                          EncodedShiftImm)) {
3212         SDValue Ops[] = {OrOpd0, ShiftedOperand,
3213                          CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3214         CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3215         return true;
3216       }
3217     }
3218     return false;
3219   }
3220 
3221   assert((!BiggerPattern) && "BiggerPattern should be handled above");
3222 
3223   uint64_t ShlImm;
3224   if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3225     if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3226       SDValue Ops[] = {
3227           Dst, Src,
3228           CurDAG->getTargetConstant(
3229               AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm), DL, VT)};
3230       CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3231       return true;
3232     }
3233 
3234     // Select the following pattern to left-shifted operand rather than BFI.
3235     // %val1 = op ..
3236     // %val2 = shl %val1, #imm
3237     // %res = or %val1, %val2
3238     //
3239     // If N is selected to be BFI, we know that
3240     // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3241     // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3242     //
3243     // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3244     if (OrOpd0.getOperand(0) == OrOpd1) {
3245       SDValue Ops[] = {
3246           OrOpd1, OrOpd1,
3247           CurDAG->getTargetConstant(
3248               AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm), DL, VT)};
3249       CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3250       return true;
3251     }
3252   }
3253 
3254   uint64_t SrlImm;
3255   if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3256     // Select the following pattern to right-shifted operand rather than BFXIL.
3257     // %val1 = op ..
3258     // %val2 = lshr %val1, #imm
3259     // %res = or %val1, %val2
3260     //
3261     // If N is selected to be BFXIL, we know that
3262     // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3263     // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3264     //
3265     // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3266     if (OrOpd0.getOperand(0) == OrOpd1) {
3267       SDValue Ops[] = {
3268           OrOpd1, OrOpd1,
3269           CurDAG->getTargetConstant(
3270               AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm), DL, VT)};
3271       CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3272       return true;
3273     }
3274   }
3275 
3276   return false;
3277 }
3278 
3279 static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3280                                       SelectionDAG *CurDAG) {
3281   assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3282 
3283   EVT VT = N->getValueType(0);
3284   if (VT != MVT::i32 && VT != MVT::i64)
3285     return false;
3286 
3287   unsigned BitWidth = VT.getSizeInBits();
3288 
3289   // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3290   // have the expected shape. Try to undo that.
3291 
3292   unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3293   unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3294 
3295   // Given a OR operation, check if we have the following pattern
3296   // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3297   //                       isBitfieldExtractOp)
3298   // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3299   //                 countTrailingZeros(mask2) == imm2 - imm + 1
3300   // f = d | c
3301   // if yes, replace the OR instruction with:
3302   // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3303 
3304   // OR is commutative, check all combinations of operand order and values of
3305   // BiggerPattern, i.e.
3306   //     Opd0, Opd1, BiggerPattern=false
3307   //     Opd1, Opd0, BiggerPattern=false
3308   //     Opd0, Opd1, BiggerPattern=true
3309   //     Opd1, Opd0, BiggerPattern=true
3310   // Several of these combinations may match, so check with BiggerPattern=false
3311   // first since that will produce better results by matching more instructions
3312   // and/or inserting fewer extra instructions.
3313   for (int I = 0; I < 4; ++I) {
3314 
3315     SDValue Dst, Src;
3316     unsigned ImmR, ImmS;
3317     bool BiggerPattern = I / 2;
3318     SDValue OrOpd0Val = N->getOperand(I % 2);
3319     SDNode *OrOpd0 = OrOpd0Val.getNode();
3320     SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3321     SDNode *OrOpd1 = OrOpd1Val.getNode();
3322 
3323     unsigned BFXOpc;
3324     int DstLSB, Width;
3325     if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3326                             NumberOfIgnoredLowBits, BiggerPattern)) {
3327       // Check that the returned opcode is compatible with the pattern,
3328       // i.e., same type and zero extended (U and not S)
3329       if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3330           (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3331         continue;
3332 
3333       // Compute the width of the bitfield insertion
3334       DstLSB = 0;
3335       Width = ImmS - ImmR + 1;
3336       // FIXME: This constraint is to catch bitfield insertion we may
3337       // want to widen the pattern if we want to grab general bitfied
3338       // move case
3339       if (Width <= 0)
3340         continue;
3341 
3342       // If the mask on the insertee is correct, we have a BFXIL operation. We
3343       // can share the ImmR and ImmS values from the already-computed UBFM.
3344     } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3345                                        BiggerPattern,
3346                                        Src, DstLSB, Width)) {
3347       ImmR = (BitWidth - DstLSB) % BitWidth;
3348       ImmS = Width - 1;
3349     } else
3350       continue;
3351 
3352     // Check the second part of the pattern
3353     EVT VT = OrOpd1Val.getValueType();
3354     assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3355 
3356     // Compute the Known Zero for the candidate of the first operand.
3357     // This allows to catch more general case than just looking for
3358     // AND with imm. Indeed, simplify-demanded-bits may have removed
3359     // the AND instruction because it proves it was useless.
3360     KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3361 
3362     // Check if there is enough room for the second operand to appear
3363     // in the first one
3364     APInt BitsToBeInserted =
3365         APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3366 
3367     if ((BitsToBeInserted & ~Known.Zero) != 0)
3368       continue;
3369 
3370     // Set the first operand
3371     uint64_t Imm;
3372     if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3373         isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3374       // In that case, we can eliminate the AND
3375       Dst = OrOpd1->getOperand(0);
3376     else
3377       // Maybe the AND has been removed by simplify-demanded-bits
3378       // or is useful because it discards more bits
3379       Dst = OrOpd1Val;
3380 
3381     // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3382     // with shifted operand is more efficient.
3383     if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3384                         BiggerPattern))
3385       return true;
3386 
3387     // both parts match
3388     SDLoc DL(N);
3389     SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3390                      CurDAG->getTargetConstant(ImmS, DL, VT)};
3391     unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3392     CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3393     return true;
3394   }
3395 
3396   // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3397   // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3398   // mask (e.g., 0x000ffff0).
3399   uint64_t Mask0Imm, Mask1Imm;
3400   SDValue And0 = N->getOperand(0);
3401   SDValue And1 = N->getOperand(1);
3402   if (And0.hasOneUse() && And1.hasOneUse() &&
3403       isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3404       isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3405       APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3406       (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3407 
3408     // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3409     // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3410     // bits to be inserted.
3411     if (isShiftedMask(Mask0Imm, VT)) {
3412       std::swap(And0, And1);
3413       std::swap(Mask0Imm, Mask1Imm);
3414     }
3415 
3416     SDValue Src = And1->getOperand(0);
3417     SDValue Dst = And0->getOperand(0);
3418     unsigned LSB = llvm::countr_zero(Mask1Imm);
3419     int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3420 
3421     // The BFXIL inserts the low-order bits from a source register, so right
3422     // shift the needed bits into place.
3423     SDLoc DL(N);
3424     unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3425     uint64_t LsrImm = LSB;
3426     if (Src->hasOneUse() &&
3427         isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3428         (LsrImm + LSB) < BitWidth) {
3429       Src = Src->getOperand(0);
3430       LsrImm += LSB;
3431     }
3432 
3433     SDNode *LSR = CurDAG->getMachineNode(
3434         ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3435         CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3436 
3437     // BFXIL is an alias of BFM, so translate to BFM operands.
3438     unsigned ImmR = (BitWidth - LSB) % BitWidth;
3439     unsigned ImmS = Width - 1;
3440 
3441     // Create the BFXIL instruction.
3442     SDValue Ops[] = {Dst, SDValue(LSR, 0),
3443                      CurDAG->getTargetConstant(ImmR, DL, VT),
3444                      CurDAG->getTargetConstant(ImmS, DL, VT)};
3445     unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3446     CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3447     return true;
3448   }
3449 
3450   return false;
3451 }
3452 
3453 bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3454   if (N->getOpcode() != ISD::OR)
3455     return false;
3456 
3457   APInt NUsefulBits;
3458   getUsefulBits(SDValue(N, 0), NUsefulBits);
3459 
3460   // If all bits are not useful, just return UNDEF.
3461   if (!NUsefulBits) {
3462     CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3463     return true;
3464   }
3465 
3466   if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3467     return true;
3468 
3469   return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3470 }
3471 
3472 /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3473 /// equivalent of a left shift by a constant amount followed by an and masking
3474 /// out a contiguous set of bits.
3475 bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3476   if (N->getOpcode() != ISD::AND)
3477     return false;
3478 
3479   EVT VT = N->getValueType(0);
3480   if (VT != MVT::i32 && VT != MVT::i64)
3481     return false;
3482 
3483   SDValue Op0;
3484   int DstLSB, Width;
3485   if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
3486                                Op0, DstLSB, Width))
3487     return false;
3488 
3489   // ImmR is the rotate right amount.
3490   unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3491   // ImmS is the most significant bit of the source to be moved.
3492   unsigned ImmS = Width - 1;
3493 
3494   SDLoc DL(N);
3495   SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
3496                    CurDAG->getTargetConstant(ImmS, DL, VT)};
3497   unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3498   CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3499   return true;
3500 }
3501 
3502 /// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3503 /// variable shift/rotate instructions.
3504 bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3505   EVT VT = N->getValueType(0);
3506 
3507   unsigned Opc;
3508   switch (N->getOpcode()) {
3509   case ISD::ROTR:
3510     Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3511     break;
3512   case ISD::SHL:
3513     Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3514     break;
3515   case ISD::SRL:
3516     Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3517     break;
3518   case ISD::SRA:
3519     Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
3520     break;
3521   default:
3522     return false;
3523   }
3524 
3525   uint64_t Size;
3526   uint64_t Bits;
3527   if (VT == MVT::i32) {
3528     Bits = 5;
3529     Size = 32;
3530   } else if (VT == MVT::i64) {
3531     Bits = 6;
3532     Size = 64;
3533   } else
3534     return false;
3535 
3536   SDValue ShiftAmt = N->getOperand(1);
3537   SDLoc DL(N);
3538   SDValue NewShiftAmt;
3539 
3540   // Skip over an extend of the shift amount.
3541   if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
3542       ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
3543     ShiftAmt = ShiftAmt->getOperand(0);
3544 
3545   if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3546     SDValue Add0 = ShiftAmt->getOperand(0);
3547     SDValue Add1 = ShiftAmt->getOperand(1);
3548     uint64_t Add0Imm;
3549     uint64_t Add1Imm;
3550     if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
3551       // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3552       // to avoid the ADD/SUB.
3553       NewShiftAmt = Add0;
3554     } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3555                isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
3556                (Add0Imm % Size == 0)) {
3557       // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
3558       // to generate a NEG instead of a SUB from a constant.
3559       unsigned NegOpc;
3560       unsigned ZeroReg;
3561       EVT SubVT = ShiftAmt->getValueType(0);
3562       if (SubVT == MVT::i32) {
3563         NegOpc = AArch64::SUBWrr;
3564         ZeroReg = AArch64::WZR;
3565       } else {
3566         assert(SubVT == MVT::i64);
3567         NegOpc = AArch64::SUBXrr;
3568         ZeroReg = AArch64::XZR;
3569       }
3570       SDValue Zero =
3571           CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3572       MachineSDNode *Neg =
3573           CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
3574       NewShiftAmt = SDValue(Neg, 0);
3575     } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3576                isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
3577       // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3578       // to generate a NOT instead of a SUB from a constant.
3579       unsigned NotOpc;
3580       unsigned ZeroReg;
3581       EVT SubVT = ShiftAmt->getValueType(0);
3582       if (SubVT == MVT::i32) {
3583         NotOpc = AArch64::ORNWrr;
3584         ZeroReg = AArch64::WZR;
3585       } else {
3586         assert(SubVT == MVT::i64);
3587         NotOpc = AArch64::ORNXrr;
3588         ZeroReg = AArch64::XZR;
3589       }
3590       SDValue Zero =
3591           CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3592       MachineSDNode *Not =
3593           CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
3594       NewShiftAmt = SDValue(Not, 0);
3595     } else
3596       return false;
3597   } else {
3598     // If the shift amount is masked with an AND, check that the mask covers the
3599     // bits that are implicitly ANDed off by the above opcodes and if so, skip
3600     // the AND.
3601     uint64_t MaskImm;
3602     if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
3603         !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
3604       return false;
3605 
3606     if ((unsigned)llvm::countr_one(MaskImm) < Bits)
3607       return false;
3608 
3609     NewShiftAmt = ShiftAmt->getOperand(0);
3610   }
3611 
3612   // Narrow/widen the shift amount to match the size of the shift operation.
3613   if (VT == MVT::i32)
3614     NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
3615   else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
3616     SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
3617     MachineSDNode *Ext = CurDAG->getMachineNode(
3618         AArch64::SUBREG_TO_REG, DL, VT,
3619         CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
3620     NewShiftAmt = SDValue(Ext, 0);
3621   }
3622 
3623   SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
3624   CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3625   return true;
3626 }
3627 
3628 bool
3629 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
3630                                               unsigned RegWidth) {
3631   APFloat FVal(0.0);
3632   if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
3633     FVal = CN->getValueAPF();
3634   else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
3635     // Some otherwise illegal constants are allowed in this case.
3636     if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
3637         !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
3638       return false;
3639 
3640     ConstantPoolSDNode *CN =
3641         dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
3642     FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
3643   } else
3644     return false;
3645 
3646   // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
3647   // is between 1 and 32 for a destination w-register, or 1 and 64 for an
3648   // x-register.
3649   //
3650   // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
3651   // want THIS_NODE to be 2^fbits. This is much easier to deal with using
3652   // integers.
3653   bool IsExact;
3654 
3655   // fbits is between 1 and 64 in the worst-case, which means the fmul
3656   // could have 2^64 as an actual operand. Need 65 bits of precision.
3657   APSInt IntVal(65, true);
3658   FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
3659 
3660   // N.b. isPowerOf2 also checks for > 0.
3661   if (!IsExact || !IntVal.isPowerOf2()) return false;
3662   unsigned FBits = IntVal.logBase2();
3663 
3664   // Checks above should have guaranteed that we haven't lost information in
3665   // finding FBits, but it must still be in range.
3666   if (FBits == 0 || FBits > RegWidth) return false;
3667 
3668   FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
3669   return true;
3670 }
3671 
3672 // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
3673 // of the string and obtains the integer values from them and combines these
3674 // into a single value to be used in the MRS/MSR instruction.
3675 static int getIntOperandFromRegisterString(StringRef RegString) {
3676   SmallVector<StringRef, 5> Fields;
3677   RegString.split(Fields, ':');
3678 
3679   if (Fields.size() == 1)
3680     return -1;
3681 
3682   assert(Fields.size() == 5
3683             && "Invalid number of fields in read register string");
3684 
3685   SmallVector<int, 5> Ops;
3686   bool AllIntFields = true;
3687 
3688   for (StringRef Field : Fields) {
3689     unsigned IntField;
3690     AllIntFields &= !Field.getAsInteger(10, IntField);
3691     Ops.push_back(IntField);
3692   }
3693 
3694   assert(AllIntFields &&
3695           "Unexpected non-integer value in special register string.");
3696   (void)AllIntFields;
3697 
3698   // Need to combine the integer fields of the string into a single value
3699   // based on the bit encoding of MRS/MSR instruction.
3700   return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
3701          (Ops[3] << 3) | (Ops[4]);
3702 }
3703 
3704 // Lower the read_register intrinsic to an MRS instruction node if the special
3705 // register string argument is either of the form detailed in the ALCE (the
3706 // form described in getIntOperandsFromRegsterString) or is a named register
3707 // known by the MRS SysReg mapper.
3708 bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
3709   const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
3710   const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
3711   SDLoc DL(N);
3712 
3713   bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
3714 
3715   unsigned Opcode64Bit = AArch64::MRS;
3716   int Imm = getIntOperandFromRegisterString(RegString->getString());
3717   if (Imm == -1) {
3718     // No match, Use the sysreg mapper to map the remaining possible strings to
3719     // the value for the register to be used for the instruction operand.
3720     const auto *TheReg =
3721         AArch64SysReg::lookupSysRegByName(RegString->getString());
3722     if (TheReg && TheReg->Readable &&
3723         TheReg->haveFeatures(Subtarget->getFeatureBits()))
3724       Imm = TheReg->Encoding;
3725     else
3726       Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
3727 
3728     if (Imm == -1) {
3729       // Still no match, see if this is "pc" or give up.
3730       if (!ReadIs128Bit && RegString->getString() == "pc") {
3731         Opcode64Bit = AArch64::ADR;
3732         Imm = 0;
3733       } else {
3734         return false;
3735       }
3736     }
3737   }
3738 
3739   SDValue InChain = N->getOperand(0);
3740   SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
3741   if (!ReadIs128Bit) {
3742     CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
3743                          {SysRegImm, InChain});
3744   } else {
3745     SDNode *MRRS = CurDAG->getMachineNode(
3746         AArch64::MRRS, DL,
3747         {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
3748         {SysRegImm, InChain});
3749 
3750     // Sysregs are not endian. The even register always contains the low half
3751     // of the register.
3752     SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
3753                                                 SDValue(MRRS, 0));
3754     SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
3755                                                 SDValue(MRRS, 0));
3756     SDValue OutChain = SDValue(MRRS, 1);
3757 
3758     ReplaceUses(SDValue(N, 0), Lo);
3759     ReplaceUses(SDValue(N, 1), Hi);
3760     ReplaceUses(SDValue(N, 2), OutChain);
3761   };
3762   return true;
3763 }
3764 
3765 // Lower the write_register intrinsic to an MSR instruction node if the special
3766 // register string argument is either of the form detailed in the ALCE (the
3767 // form described in getIntOperandsFromRegsterString) or is a named register
3768 // known by the MSR SysReg mapper.
3769 bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
3770   const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
3771   const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
3772   SDLoc DL(N);
3773 
3774   bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
3775 
3776   if (!WriteIs128Bit) {
3777     // Check if the register was one of those allowed as the pstatefield value
3778     // in the MSR (immediate) instruction. To accept the values allowed in the
3779     // pstatefield for the MSR (immediate) instruction, we also require that an
3780     // immediate value has been provided as an argument, we know that this is
3781     // the case as it has been ensured by semantic checking.
3782     auto trySelectPState = [&](auto PMapper, unsigned State) {
3783       if (PMapper) {
3784         assert(isa<ConstantSDNode>(N->getOperand(2)) &&
3785                "Expected a constant integer expression.");
3786         unsigned Reg = PMapper->Encoding;
3787         uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
3788         CurDAG->SelectNodeTo(
3789             N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
3790             CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
3791         return true;
3792       }
3793       return false;
3794     };
3795 
3796     if (trySelectPState(
3797             AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
3798             AArch64::MSRpstateImm4))
3799       return true;
3800     if (trySelectPState(
3801             AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
3802             AArch64::MSRpstateImm1))
3803       return true;
3804   }
3805 
3806   int Imm = getIntOperandFromRegisterString(RegString->getString());
3807   if (Imm == -1) {
3808     // Use the sysreg mapper to attempt to map the remaining possible strings
3809     // to the value for the register to be used for the MSR (register)
3810     // instruction operand.
3811     auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
3812     if (TheReg && TheReg->Writeable &&
3813         TheReg->haveFeatures(Subtarget->getFeatureBits()))
3814       Imm = TheReg->Encoding;
3815     else
3816       Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
3817 
3818     if (Imm == -1)
3819       return false;
3820   }
3821 
3822   SDValue InChain = N->getOperand(0);
3823   if (!WriteIs128Bit) {
3824     CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
3825                          CurDAG->getTargetConstant(Imm, DL, MVT::i32),
3826                          N->getOperand(2), InChain);
3827   } else {
3828     // No endian swap. The lower half always goes into the even subreg, and the
3829     // higher half always into the odd supreg.
3830     SDNode *Pair = CurDAG->getMachineNode(
3831         TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
3832         {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
3833                                    MVT::i32),
3834          N->getOperand(2),
3835          CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
3836          N->getOperand(3),
3837          CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
3838 
3839     CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
3840                          CurDAG->getTargetConstant(Imm, DL, MVT::i32),
3841                          SDValue(Pair, 0), InChain);
3842   }
3843 
3844   return true;
3845 }
3846 
3847 /// We've got special pseudo-instructions for these
3848 bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
3849   unsigned Opcode;
3850   EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
3851 
3852   // Leave IR for LSE if subtarget supports it.
3853   if (Subtarget->hasLSE()) return false;
3854 
3855   if (MemTy == MVT::i8)
3856     Opcode = AArch64::CMP_SWAP_8;
3857   else if (MemTy == MVT::i16)
3858     Opcode = AArch64::CMP_SWAP_16;
3859   else if (MemTy == MVT::i32)
3860     Opcode = AArch64::CMP_SWAP_32;
3861   else if (MemTy == MVT::i64)
3862     Opcode = AArch64::CMP_SWAP_64;
3863   else
3864     llvm_unreachable("Unknown AtomicCmpSwap type");
3865 
3866   MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
3867   SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
3868                    N->getOperand(0)};
3869   SDNode *CmpSwap = CurDAG->getMachineNode(
3870       Opcode, SDLoc(N),
3871       CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
3872 
3873   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
3874   CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
3875 
3876   ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
3877   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
3878   CurDAG->RemoveDeadNode(N);
3879 
3880   return true;
3881 }
3882 
3883 bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
3884                                              SDValue &Shift) {
3885   if (!isa<ConstantSDNode>(N))
3886     return false;
3887 
3888   SDLoc DL(N);
3889   uint64_t Val = cast<ConstantSDNode>(N)
3890                      ->getAPIntValue()
3891                      .trunc(VT.getFixedSizeInBits())
3892                      .getZExtValue();
3893 
3894   switch (VT.SimpleTy) {
3895   case MVT::i8:
3896     // All immediates are supported.
3897     Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
3898     Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
3899     return true;
3900   case MVT::i16:
3901   case MVT::i32:
3902   case MVT::i64:
3903     // Support 8bit unsigned immediates.
3904     if (Val <= 255) {
3905       Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
3906       Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
3907       return true;
3908     }
3909     // Support 16bit unsigned immediates that are a multiple of 256.
3910     if (Val <= 65280 && Val % 256 == 0) {
3911       Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
3912       Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
3913       return true;
3914     }
3915     break;
3916   default:
3917     break;
3918   }
3919 
3920   return false;
3921 }
3922 
3923 bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
3924                                              SDValue &Shift) {
3925   if (!isa<ConstantSDNode>(N))
3926     return false;
3927 
3928   SDLoc DL(N);
3929   int64_t Val = cast<ConstantSDNode>(N)
3930                     ->getAPIntValue()
3931                     .trunc(VT.getFixedSizeInBits())
3932                     .getSExtValue();
3933 
3934   switch (VT.SimpleTy) {
3935   case MVT::i8:
3936     // All immediates are supported.
3937     Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
3938     Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
3939     return true;
3940   case MVT::i16:
3941   case MVT::i32:
3942   case MVT::i64:
3943     // Support 8bit signed immediates.
3944     if (Val >= -128 && Val <= 127) {
3945       Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
3946       Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
3947       return true;
3948     }
3949     // Support 16bit signed immediates that are a multiple of 256.
3950     if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) {
3951       Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
3952       Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32);
3953       return true;
3954     }
3955     break;
3956   default:
3957     break;
3958   }
3959 
3960   return false;
3961 }
3962 
3963 bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
3964   if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
3965     int64_t ImmVal = CNode->getSExtValue();
3966     SDLoc DL(N);
3967     if (ImmVal >= -128 && ImmVal < 128) {
3968       Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
3969       return true;
3970     }
3971   }
3972   return false;
3973 }
3974 
3975 bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
3976   if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
3977     uint64_t ImmVal = CNode->getZExtValue();
3978 
3979     switch (VT.SimpleTy) {
3980     case MVT::i8:
3981       ImmVal &= 0xFF;
3982       break;
3983     case MVT::i16:
3984       ImmVal &= 0xFFFF;
3985       break;
3986     case MVT::i32:
3987       ImmVal &= 0xFFFFFFFF;
3988       break;
3989     case MVT::i64:
3990       break;
3991     default:
3992       llvm_unreachable("Unexpected type");
3993     }
3994 
3995     if (ImmVal < 256) {
3996       Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
3997       return true;
3998     }
3999   }
4000   return false;
4001 }
4002 
4003 bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4004                                               bool Invert) {
4005   if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4006     uint64_t ImmVal = CNode->getZExtValue();
4007     SDLoc DL(N);
4008 
4009     if (Invert)
4010       ImmVal = ~ImmVal;
4011 
4012     // Shift mask depending on type size.
4013     switch (VT.SimpleTy) {
4014     case MVT::i8:
4015       ImmVal &= 0xFF;
4016       ImmVal |= ImmVal << 8;
4017       ImmVal |= ImmVal << 16;
4018       ImmVal |= ImmVal << 32;
4019       break;
4020     case MVT::i16:
4021       ImmVal &= 0xFFFF;
4022       ImmVal |= ImmVal << 16;
4023       ImmVal |= ImmVal << 32;
4024       break;
4025     case MVT::i32:
4026       ImmVal &= 0xFFFFFFFF;
4027       ImmVal |= ImmVal << 32;
4028       break;
4029     case MVT::i64:
4030       break;
4031     default:
4032       llvm_unreachable("Unexpected type");
4033     }
4034 
4035     uint64_t encoding;
4036     if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {
4037       Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
4038       return true;
4039     }
4040   }
4041   return false;
4042 }
4043 
4044 // SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4045 // Rather than attempt to normalise everything we can sometimes saturate the
4046 // shift amount during selection. This function also allows for consistent
4047 // isel patterns by ensuring the resulting "Imm" node is of the i32 type
4048 // required by the instructions.
4049 bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4050                                             uint64_t High, bool AllowSaturation,
4051                                             SDValue &Imm) {
4052   if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
4053     uint64_t ImmVal = CN->getZExtValue();
4054 
4055     // Reject shift amounts that are too small.
4056     if (ImmVal < Low)
4057       return false;
4058 
4059     // Reject or saturate shift amounts that are too big.
4060     if (ImmVal > High) {
4061       if (!AllowSaturation)
4062         return false;
4063       ImmVal = High;
4064     }
4065 
4066     Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4067     return true;
4068   }
4069 
4070   return false;
4071 }
4072 
4073 bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4074   // tagp(FrameIndex, IRGstack, tag_offset):
4075   // since the offset between FrameIndex and IRGstack is a compile-time
4076   // constant, this can be lowered to a single ADDG instruction.
4077   if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
4078     return false;
4079   }
4080 
4081   SDValue IRG_SP = N->getOperand(2);
4082   if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4083       cast<ConstantSDNode>(IRG_SP->getOperand(1))->getZExtValue() !=
4084           Intrinsic::aarch64_irg_sp) {
4085     return false;
4086   }
4087 
4088   const TargetLowering *TLI = getTargetLowering();
4089   SDLoc DL(N);
4090   int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
4091   SDValue FiOp = CurDAG->getTargetFrameIndex(
4092       FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4093   int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
4094 
4095   SDNode *Out = CurDAG->getMachineNode(
4096       AArch64::TAGPstack, DL, MVT::i64,
4097       {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4098        CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4099   ReplaceNode(N, Out);
4100   return true;
4101 }
4102 
4103 void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4104   assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4105          "llvm.aarch64.tagp third argument must be an immediate");
4106   if (trySelectStackSlotTagP(N))
4107     return;
4108   // FIXME: above applies in any case when offset between Op1 and Op2 is a
4109   // compile-time constant, not just for stack allocations.
4110 
4111   // General case for unrelated pointers in Op1 and Op2.
4112   SDLoc DL(N);
4113   int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
4114   SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4115                                       {N->getOperand(1), N->getOperand(2)});
4116   SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4117                                       {SDValue(N1, 0), N->getOperand(2)});
4118   SDNode *N3 = CurDAG->getMachineNode(
4119       AArch64::ADDG, DL, MVT::i64,
4120       {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4121        CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4122   ReplaceNode(N, N3);
4123 }
4124 
4125 bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4126   assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4127 
4128   // Bail when not a "cast" like insert_subvector.
4129   if (cast<ConstantSDNode>(N->getOperand(2))->getZExtValue() != 0)
4130     return false;
4131   if (!N->getOperand(0).isUndef())
4132     return false;
4133 
4134   // Bail when normal isel should do the job.
4135   EVT VT = N->getValueType(0);
4136   EVT InVT = N->getOperand(1).getValueType();
4137   if (VT.isFixedLengthVector() || InVT.isScalableVector())
4138     return false;
4139   if (InVT.getSizeInBits() <= 128)
4140     return false;
4141 
4142   // NOTE: We can only get here when doing fixed length SVE code generation.
4143   // We do manual selection because the types involved are not linked to real
4144   // registers (despite being legal) and must be coerced into SVE registers.
4145 
4146   assert(VT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock &&
4147          "Expected to insert into a packed scalable vector!");
4148 
4149   SDLoc DL(N);
4150   auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4151   ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4152                                         N->getOperand(1), RC));
4153   return true;
4154 }
4155 
4156 bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4157   assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4158 
4159   // Bail when not a "cast" like extract_subvector.
4160   if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 0)
4161     return false;
4162 
4163   // Bail when normal isel can do the job.
4164   EVT VT = N->getValueType(0);
4165   EVT InVT = N->getOperand(0).getValueType();
4166   if (VT.isScalableVector() || InVT.isFixedLengthVector())
4167     return false;
4168   if (VT.getSizeInBits() <= 128)
4169     return false;
4170 
4171   // NOTE: We can only get here when doing fixed length SVE code generation.
4172   // We do manual selection because the types involved are not linked to real
4173   // registers (despite being legal) and must be coerced into SVE registers.
4174 
4175   assert(InVT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock &&
4176          "Expected to extract from a packed scalable vector!");
4177 
4178   SDLoc DL(N);
4179   auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4180   ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4181                                         N->getOperand(0), RC));
4182   return true;
4183 }
4184 
4185 void AArch64DAGToDAGISel::Select(SDNode *Node) {
4186   // If we have a custom node, we already have selected!
4187   if (Node->isMachineOpcode()) {
4188     LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4189     Node->setNodeId(-1);
4190     return;
4191   }
4192 
4193   // Few custom selection stuff.
4194   EVT VT = Node->getValueType(0);
4195 
4196   switch (Node->getOpcode()) {
4197   default:
4198     break;
4199 
4200   case ISD::ATOMIC_CMP_SWAP:
4201     if (SelectCMP_SWAP(Node))
4202       return;
4203     break;
4204 
4205   case ISD::READ_REGISTER:
4206   case AArch64ISD::MRRS:
4207     if (tryReadRegister(Node))
4208       return;
4209     break;
4210 
4211   case ISD::WRITE_REGISTER:
4212   case AArch64ISD::MSRR:
4213     if (tryWriteRegister(Node))
4214       return;
4215     break;
4216 
4217   case ISD::LOAD: {
4218     // Try to select as an indexed load. Fall through to normal processing
4219     // if we can't.
4220     if (tryIndexedLoad(Node))
4221       return;
4222     break;
4223   }
4224 
4225   case ISD::SRL:
4226   case ISD::AND:
4227   case ISD::SRA:
4228   case ISD::SIGN_EXTEND_INREG:
4229     if (tryBitfieldExtractOp(Node))
4230       return;
4231     if (tryBitfieldInsertInZeroOp(Node))
4232       return;
4233     [[fallthrough]];
4234   case ISD::ROTR:
4235   case ISD::SHL:
4236     if (tryShiftAmountMod(Node))
4237       return;
4238     break;
4239 
4240   case ISD::SIGN_EXTEND:
4241     if (tryBitfieldExtractOpFromSExt(Node))
4242       return;
4243     break;
4244 
4245   case ISD::OR:
4246     if (tryBitfieldInsertOp(Node))
4247       return;
4248     break;
4249 
4250   case ISD::EXTRACT_SUBVECTOR: {
4251     if (trySelectCastScalableToFixedLengthVector(Node))
4252       return;
4253     break;
4254   }
4255 
4256   case ISD::INSERT_SUBVECTOR: {
4257     if (trySelectCastFixedLengthToScalableVector(Node))
4258       return;
4259     break;
4260   }
4261 
4262   case ISD::Constant: {
4263     // Materialize zero constants as copies from WZR/XZR.  This allows
4264     // the coalescer to propagate these into other instructions.
4265     ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
4266     if (ConstNode->isZero()) {
4267       if (VT == MVT::i32) {
4268         SDValue New = CurDAG->getCopyFromReg(
4269             CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
4270         ReplaceNode(Node, New.getNode());
4271         return;
4272       } else if (VT == MVT::i64) {
4273         SDValue New = CurDAG->getCopyFromReg(
4274             CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
4275         ReplaceNode(Node, New.getNode());
4276         return;
4277       }
4278     }
4279     break;
4280   }
4281 
4282   case ISD::FrameIndex: {
4283     // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
4284     int FI = cast<FrameIndexSDNode>(Node)->getIndex();
4285     unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
4286     const TargetLowering *TLI = getTargetLowering();
4287     SDValue TFI = CurDAG->getTargetFrameIndex(
4288         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4289     SDLoc DL(Node);
4290     SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
4291                       CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
4292     CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
4293     return;
4294   }
4295   case ISD::INTRINSIC_W_CHAIN: {
4296     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
4297     switch (IntNo) {
4298     default:
4299       break;
4300     case Intrinsic::aarch64_ldaxp:
4301     case Intrinsic::aarch64_ldxp: {
4302       unsigned Op =
4303           IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
4304       SDValue MemAddr = Node->getOperand(2);
4305       SDLoc DL(Node);
4306       SDValue Chain = Node->getOperand(0);
4307 
4308       SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
4309                                           MVT::Other, MemAddr, Chain);
4310 
4311       // Transfer memoperands.
4312       MachineMemOperand *MemOp =
4313           cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4314       CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4315       ReplaceNode(Node, Ld);
4316       return;
4317     }
4318     case Intrinsic::aarch64_stlxp:
4319     case Intrinsic::aarch64_stxp: {
4320       unsigned Op =
4321           IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
4322       SDLoc DL(Node);
4323       SDValue Chain = Node->getOperand(0);
4324       SDValue ValLo = Node->getOperand(2);
4325       SDValue ValHi = Node->getOperand(3);
4326       SDValue MemAddr = Node->getOperand(4);
4327 
4328       // Place arguments in the right order.
4329       SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
4330 
4331       SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
4332       // Transfer memoperands.
4333       MachineMemOperand *MemOp =
4334           cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4335       CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4336 
4337       ReplaceNode(Node, St);
4338       return;
4339     }
4340     case Intrinsic::aarch64_neon_ld1x2:
4341       if (VT == MVT::v8i8) {
4342         SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
4343         return;
4344       } else if (VT == MVT::v16i8) {
4345         SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
4346         return;
4347       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4348         SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
4349         return;
4350       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4351         SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
4352         return;
4353       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4354         SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
4355         return;
4356       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4357         SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
4358         return;
4359       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4360         SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4361         return;
4362       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4363         SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
4364         return;
4365       }
4366       break;
4367     case Intrinsic::aarch64_neon_ld1x3:
4368       if (VT == MVT::v8i8) {
4369         SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
4370         return;
4371       } else if (VT == MVT::v16i8) {
4372         SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
4373         return;
4374       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4375         SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
4376         return;
4377       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4378         SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
4379         return;
4380       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4381         SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
4382         return;
4383       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4384         SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
4385         return;
4386       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4387         SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4388         return;
4389       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4390         SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
4391         return;
4392       }
4393       break;
4394     case Intrinsic::aarch64_neon_ld1x4:
4395       if (VT == MVT::v8i8) {
4396         SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
4397         return;
4398       } else if (VT == MVT::v16i8) {
4399         SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
4400         return;
4401       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4402         SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
4403         return;
4404       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4405         SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
4406         return;
4407       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4408         SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
4409         return;
4410       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4411         SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
4412         return;
4413       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4414         SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4415         return;
4416       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4417         SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
4418         return;
4419       }
4420       break;
4421     case Intrinsic::aarch64_neon_ld2:
4422       if (VT == MVT::v8i8) {
4423         SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
4424         return;
4425       } else if (VT == MVT::v16i8) {
4426         SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
4427         return;
4428       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4429         SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
4430         return;
4431       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4432         SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
4433         return;
4434       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4435         SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
4436         return;
4437       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4438         SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
4439         return;
4440       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4441         SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4442         return;
4443       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4444         SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
4445         return;
4446       }
4447       break;
4448     case Intrinsic::aarch64_neon_ld3:
4449       if (VT == MVT::v8i8) {
4450         SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
4451         return;
4452       } else if (VT == MVT::v16i8) {
4453         SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
4454         return;
4455       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4456         SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
4457         return;
4458       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4459         SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
4460         return;
4461       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4462         SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
4463         return;
4464       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4465         SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
4466         return;
4467       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4468         SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4469         return;
4470       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4471         SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
4472         return;
4473       }
4474       break;
4475     case Intrinsic::aarch64_neon_ld4:
4476       if (VT == MVT::v8i8) {
4477         SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
4478         return;
4479       } else if (VT == MVT::v16i8) {
4480         SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
4481         return;
4482       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4483         SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
4484         return;
4485       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4486         SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
4487         return;
4488       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4489         SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
4490         return;
4491       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4492         SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
4493         return;
4494       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4495         SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4496         return;
4497       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4498         SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
4499         return;
4500       }
4501       break;
4502     case Intrinsic::aarch64_neon_ld2r:
4503       if (VT == MVT::v8i8) {
4504         SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
4505         return;
4506       } else if (VT == MVT::v16i8) {
4507         SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
4508         return;
4509       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4510         SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
4511         return;
4512       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4513         SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
4514         return;
4515       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4516         SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
4517         return;
4518       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4519         SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
4520         return;
4521       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4522         SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
4523         return;
4524       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4525         SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
4526         return;
4527       }
4528       break;
4529     case Intrinsic::aarch64_neon_ld3r:
4530       if (VT == MVT::v8i8) {
4531         SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
4532         return;
4533       } else if (VT == MVT::v16i8) {
4534         SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
4535         return;
4536       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4537         SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
4538         return;
4539       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4540         SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
4541         return;
4542       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4543         SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
4544         return;
4545       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4546         SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
4547         return;
4548       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4549         SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
4550         return;
4551       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4552         SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
4553         return;
4554       }
4555       break;
4556     case Intrinsic::aarch64_neon_ld4r:
4557       if (VT == MVT::v8i8) {
4558         SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
4559         return;
4560       } else if (VT == MVT::v16i8) {
4561         SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
4562         return;
4563       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4564         SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
4565         return;
4566       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4567         SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
4568         return;
4569       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4570         SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
4571         return;
4572       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4573         SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
4574         return;
4575       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4576         SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
4577         return;
4578       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4579         SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
4580         return;
4581       }
4582       break;
4583     case Intrinsic::aarch64_neon_ld2lane:
4584       if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4585         SelectLoadLane(Node, 2, AArch64::LD2i8);
4586         return;
4587       } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4588                  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4589         SelectLoadLane(Node, 2, AArch64::LD2i16);
4590         return;
4591       } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4592                  VT == MVT::v2f32) {
4593         SelectLoadLane(Node, 2, AArch64::LD2i32);
4594         return;
4595       } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4596                  VT == MVT::v1f64) {
4597         SelectLoadLane(Node, 2, AArch64::LD2i64);
4598         return;
4599       }
4600       break;
4601     case Intrinsic::aarch64_neon_ld3lane:
4602       if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4603         SelectLoadLane(Node, 3, AArch64::LD3i8);
4604         return;
4605       } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4606                  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4607         SelectLoadLane(Node, 3, AArch64::LD3i16);
4608         return;
4609       } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4610                  VT == MVT::v2f32) {
4611         SelectLoadLane(Node, 3, AArch64::LD3i32);
4612         return;
4613       } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4614                  VT == MVT::v1f64) {
4615         SelectLoadLane(Node, 3, AArch64::LD3i64);
4616         return;
4617       }
4618       break;
4619     case Intrinsic::aarch64_neon_ld4lane:
4620       if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4621         SelectLoadLane(Node, 4, AArch64::LD4i8);
4622         return;
4623       } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4624                  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4625         SelectLoadLane(Node, 4, AArch64::LD4i16);
4626         return;
4627       } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4628                  VT == MVT::v2f32) {
4629         SelectLoadLane(Node, 4, AArch64::LD4i32);
4630         return;
4631       } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4632                  VT == MVT::v1f64) {
4633         SelectLoadLane(Node, 4, AArch64::LD4i64);
4634         return;
4635       }
4636       break;
4637     case Intrinsic::aarch64_ld64b:
4638       SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
4639       return;
4640     case Intrinsic::aarch64_sve_ld2_sret: {
4641       if (VT == MVT::nxv16i8) {
4642         SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
4643                              true);
4644         return;
4645       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4646                  VT == MVT::nxv8bf16) {
4647         SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
4648                              true);
4649         return;
4650       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4651         SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
4652                              true);
4653         return;
4654       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4655         SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
4656                              true);
4657         return;
4658       }
4659       break;
4660     }
4661     case Intrinsic::aarch64_sve_ld1_pn_x2: {
4662       if (VT == MVT::nxv16i8) {
4663         SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM, AArch64::LD1B_2Z);
4664         return;
4665       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4666                  VT == MVT::nxv8bf16) {
4667         SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM, AArch64::LD1H_2Z);
4668         return;
4669       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4670         SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM, AArch64::LD1W_2Z);
4671         return;
4672       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4673         SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM, AArch64::LD1D_2Z);
4674         return;
4675       }
4676       break;
4677     }
4678     case Intrinsic::aarch64_sve_ld1_pn_x4: {
4679       if (VT == MVT::nxv16i8) {
4680         SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM, AArch64::LD1B_4Z);
4681         return;
4682       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4683                  VT == MVT::nxv8bf16) {
4684         SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM, AArch64::LD1H_4Z);
4685         return;
4686       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4687         SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM, AArch64::LD1W_4Z);
4688         return;
4689       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4690         SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM, AArch64::LD1D_4Z);
4691         return;
4692       }
4693       break;
4694     }
4695     case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
4696       if (VT == MVT::nxv16i8) {
4697         SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM, AArch64::LDNT1B_2Z);
4698         return;
4699       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4700                  VT == MVT::nxv8bf16) {
4701         SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM, AArch64::LDNT1H_2Z);
4702         return;
4703       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4704         SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM, AArch64::LDNT1W_2Z);
4705         return;
4706       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4707         SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM, AArch64::LDNT1D_2Z);
4708         return;
4709       }
4710       break;
4711     }
4712     case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
4713       if (VT == MVT::nxv16i8) {
4714         SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM, AArch64::LDNT1B_4Z);
4715         return;
4716       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4717                  VT == MVT::nxv8bf16) {
4718         SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM, AArch64::LDNT1H_4Z);
4719         return;
4720       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4721         SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM, AArch64::LDNT1W_4Z);
4722         return;
4723       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4724         SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM, AArch64::LDNT1D_4Z);
4725         return;
4726       }
4727       break;
4728     }
4729     case Intrinsic::aarch64_sve_ld3_sret: {
4730       if (VT == MVT::nxv16i8) {
4731         SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
4732                              true);
4733         return;
4734       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4735                  VT == MVT::nxv8bf16) {
4736         SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
4737                              true);
4738         return;
4739       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4740         SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
4741                              true);
4742         return;
4743       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4744         SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
4745                              true);
4746         return;
4747       }
4748       break;
4749     }
4750     case Intrinsic::aarch64_sve_ld4_sret: {
4751       if (VT == MVT::nxv16i8) {
4752         SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
4753                              true);
4754         return;
4755       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4756                  VT == MVT::nxv8bf16) {
4757         SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
4758                              true);
4759         return;
4760       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4761         SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
4762                              true);
4763         return;
4764       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4765         SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
4766                              true);
4767         return;
4768       }
4769       break;
4770     }
4771     case Intrinsic::aarch64_sme_read_hor_vg2: {
4772       if (VT == MVT::nxv16i8) {
4773         SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
4774                                      AArch64::MOVA_2ZMXI_H_B);
4775         return;
4776       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4777                  VT == MVT::nxv8bf16) {
4778         SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
4779                                     AArch64::MOVA_2ZMXI_H_H);
4780         return;
4781       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4782         SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
4783                                     AArch64::MOVA_2ZMXI_H_S);
4784         return;
4785       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4786         SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
4787                                     AArch64::MOVA_2ZMXI_H_D);
4788         return;
4789       }
4790       break;
4791     }
4792     case Intrinsic::aarch64_sme_read_ver_vg2: {
4793       if (VT == MVT::nxv16i8) {
4794         SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
4795                                      AArch64::MOVA_2ZMXI_V_B);
4796         return;
4797       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4798                  VT == MVT::nxv8bf16) {
4799         SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
4800                                     AArch64::MOVA_2ZMXI_V_H);
4801         return;
4802       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4803         SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
4804                                     AArch64::MOVA_2ZMXI_V_S);
4805         return;
4806       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4807         SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
4808                                     AArch64::MOVA_2ZMXI_V_D);
4809         return;
4810       }
4811       break;
4812     }
4813     case Intrinsic::aarch64_sme_read_hor_vg4: {
4814       if (VT == MVT::nxv16i8) {
4815         SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
4816                                      AArch64::MOVA_4ZMXI_H_B);
4817         return;
4818       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4819                  VT == MVT::nxv8bf16) {
4820         SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
4821                                     AArch64::MOVA_4ZMXI_H_H);
4822         return;
4823       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4824         SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0,
4825                                     AArch64::MOVA_4ZMXI_H_S);
4826         return;
4827       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4828         SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0,
4829                                     AArch64::MOVA_4ZMXI_H_D);
4830         return;
4831       }
4832       break;
4833     }
4834     case Intrinsic::aarch64_sme_read_ver_vg4: {
4835       if (VT == MVT::nxv16i8) {
4836         SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
4837                                      AArch64::MOVA_4ZMXI_V_B);
4838         return;
4839       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4840                  VT == MVT::nxv8bf16) {
4841         SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
4842                                     AArch64::MOVA_4ZMXI_V_H);
4843         return;
4844       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4845         SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0,
4846                                     AArch64::MOVA_4ZMXI_V_S);
4847         return;
4848       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4849         SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0,
4850                                     AArch64::MOVA_4ZMXI_V_D);
4851         return;
4852       }
4853       break;
4854     }
4855     case Intrinsic::aarch64_sme_read_vg1x2: {
4856       SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA,
4857                                   AArch64::MOVA_VG2_2ZMXI);
4858       return;
4859     }
4860     case Intrinsic::aarch64_sme_read_vg1x4: {
4861       SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA,
4862                                   AArch64::MOVA_VG4_4ZMXI);
4863       return;
4864     }
4865     case Intrinsic::swift_async_context_addr: {
4866       SDLoc DL(Node);
4867       SDValue Chain = Node->getOperand(0);
4868       SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
4869       SDValue Res = SDValue(
4870           CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
4871                                  CurDAG->getTargetConstant(8, DL, MVT::i32),
4872                                  CurDAG->getTargetConstant(0, DL, MVT::i32)),
4873           0);
4874       ReplaceUses(SDValue(Node, 0), Res);
4875       ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
4876       CurDAG->RemoveDeadNode(Node);
4877 
4878       auto &MF = CurDAG->getMachineFunction();
4879       MF.getFrameInfo().setFrameAddressIsTaken(true);
4880       MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
4881       return;
4882     }
4883     }
4884   } break;
4885   case ISD::INTRINSIC_WO_CHAIN: {
4886     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
4887     switch (IntNo) {
4888     default:
4889       break;
4890     case Intrinsic::aarch64_tagp:
4891       SelectTagP(Node);
4892       return;
4893     case Intrinsic::aarch64_neon_tbl2:
4894       SelectTable(Node, 2,
4895                   VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
4896                   false);
4897       return;
4898     case Intrinsic::aarch64_neon_tbl3:
4899       SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
4900                                            : AArch64::TBLv16i8Three,
4901                   false);
4902       return;
4903     case Intrinsic::aarch64_neon_tbl4:
4904       SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
4905                                            : AArch64::TBLv16i8Four,
4906                   false);
4907       return;
4908     case Intrinsic::aarch64_neon_tbx2:
4909       SelectTable(Node, 2,
4910                   VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
4911                   true);
4912       return;
4913     case Intrinsic::aarch64_neon_tbx3:
4914       SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
4915                                            : AArch64::TBXv16i8Three,
4916                   true);
4917       return;
4918     case Intrinsic::aarch64_neon_tbx4:
4919       SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
4920                                            : AArch64::TBXv16i8Four,
4921                   true);
4922       return;
4923     case Intrinsic::aarch64_sve_srshl_single_x2:
4924       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
4925               Node->getValueType(0),
4926               {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
4927                AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
4928         SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
4929       return;
4930     case Intrinsic::aarch64_sve_srshl_single_x4:
4931       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
4932               Node->getValueType(0),
4933               {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
4934                AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
4935         SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
4936       return;
4937     case Intrinsic::aarch64_sve_urshl_single_x2:
4938       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
4939               Node->getValueType(0),
4940               {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
4941                AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
4942         SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
4943       return;
4944     case Intrinsic::aarch64_sve_urshl_single_x4:
4945       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
4946               Node->getValueType(0),
4947               {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
4948                AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
4949         SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
4950       return;
4951     case Intrinsic::aarch64_sve_srshl_x2:
4952       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
4953               Node->getValueType(0),
4954               {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
4955                AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
4956         SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
4957       return;
4958     case Intrinsic::aarch64_sve_srshl_x4:
4959       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
4960               Node->getValueType(0),
4961               {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
4962                AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
4963         SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
4964       return;
4965     case Intrinsic::aarch64_sve_urshl_x2:
4966       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
4967               Node->getValueType(0),
4968               {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
4969                AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
4970         SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
4971       return;
4972     case Intrinsic::aarch64_sve_urshl_x4:
4973       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
4974               Node->getValueType(0),
4975               {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
4976                AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
4977         SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
4978       return;
4979     case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
4980       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
4981               Node->getValueType(0),
4982               {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
4983                AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
4984         SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
4985       return;
4986     case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
4987       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
4988               Node->getValueType(0),
4989               {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
4990                AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
4991         SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
4992       return;
4993     case Intrinsic::aarch64_sve_sqdmulh_vgx2:
4994       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
4995               Node->getValueType(0),
4996               {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
4997                AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
4998         SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
4999       return;
5000     case Intrinsic::aarch64_sve_sqdmulh_vgx4:
5001       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5002               Node->getValueType(0),
5003               {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
5004                AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
5005         SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5006       return;
5007     case Intrinsic::aarch64_sve_whilege_x2:
5008       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5009               Node->getValueType(0),
5010               {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
5011                AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
5012         SelectWhilePair(Node, Op);
5013       return;
5014     case Intrinsic::aarch64_sve_whilegt_x2:
5015       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5016               Node->getValueType(0),
5017               {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
5018                AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
5019         SelectWhilePair(Node, Op);
5020       return;
5021     case Intrinsic::aarch64_sve_whilehi_x2:
5022       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5023               Node->getValueType(0),
5024               {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
5025                AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
5026         SelectWhilePair(Node, Op);
5027       return;
5028     case Intrinsic::aarch64_sve_whilehs_x2:
5029       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5030               Node->getValueType(0),
5031               {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
5032                AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
5033         SelectWhilePair(Node, Op);
5034       return;
5035     case Intrinsic::aarch64_sve_whilele_x2:
5036       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5037               Node->getValueType(0),
5038               {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
5039                AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
5040       SelectWhilePair(Node, Op);
5041       return;
5042     case Intrinsic::aarch64_sve_whilelo_x2:
5043       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5044               Node->getValueType(0),
5045               {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
5046                AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
5047       SelectWhilePair(Node, Op);
5048       return;
5049     case Intrinsic::aarch64_sve_whilels_x2:
5050       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5051               Node->getValueType(0),
5052               {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
5053                AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
5054         SelectWhilePair(Node, Op);
5055       return;
5056     case Intrinsic::aarch64_sve_whilelt_x2:
5057       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5058               Node->getValueType(0),
5059               {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
5060                AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
5061         SelectWhilePair(Node, Op);
5062       return;
5063     case Intrinsic::aarch64_sve_smax_single_x2:
5064       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5065               Node->getValueType(0),
5066               {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
5067                AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
5068         SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5069       return;
5070     case Intrinsic::aarch64_sve_umax_single_x2:
5071       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5072               Node->getValueType(0),
5073               {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
5074                AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
5075         SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5076       return;
5077     case Intrinsic::aarch64_sve_fmax_single_x2:
5078       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5079               Node->getValueType(0),
5080               {0, AArch64::FMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_S,
5081                AArch64::FMAX_VG2_2ZZ_D}))
5082         SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5083       return;
5084     case Intrinsic::aarch64_sve_smax_single_x4:
5085       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5086               Node->getValueType(0),
5087               {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
5088                AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
5089         SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5090       return;
5091     case Intrinsic::aarch64_sve_umax_single_x4:
5092       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5093               Node->getValueType(0),
5094               {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
5095                AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
5096         SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5097       return;
5098     case Intrinsic::aarch64_sve_fmax_single_x4:
5099       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5100               Node->getValueType(0),
5101               {0, AArch64::FMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_S,
5102                AArch64::FMAX_VG4_4ZZ_D}))
5103         SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5104       return;
5105     case Intrinsic::aarch64_sve_smin_single_x2:
5106       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5107               Node->getValueType(0),
5108               {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
5109                AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
5110         SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5111       return;
5112     case Intrinsic::aarch64_sve_umin_single_x2:
5113       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5114               Node->getValueType(0),
5115               {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
5116                AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
5117         SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5118       return;
5119     case Intrinsic::aarch64_sve_fmin_single_x2:
5120       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5121               Node->getValueType(0),
5122               {0, AArch64::FMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_S,
5123                AArch64::FMIN_VG2_2ZZ_D}))
5124         SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5125       return;
5126     case Intrinsic::aarch64_sve_smin_single_x4:
5127       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5128               Node->getValueType(0),
5129               {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
5130                AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
5131         SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5132       return;
5133     case Intrinsic::aarch64_sve_umin_single_x4:
5134       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5135               Node->getValueType(0),
5136               {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
5137                AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
5138         SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5139       return;
5140     case Intrinsic::aarch64_sve_fmin_single_x4:
5141       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5142               Node->getValueType(0),
5143               {0, AArch64::FMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_S,
5144                AArch64::FMIN_VG4_4ZZ_D}))
5145         SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5146       return;
5147     case Intrinsic::aarch64_sve_smax_x2:
5148       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5149               Node->getValueType(0),
5150               {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
5151                AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
5152         SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5153       return;
5154     case Intrinsic::aarch64_sve_umax_x2:
5155       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5156               Node->getValueType(0),
5157               {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
5158                AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
5159         SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5160       return;
5161     case Intrinsic::aarch64_sve_fmax_x2:
5162       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5163               Node->getValueType(0),
5164               {0, AArch64::FMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_S,
5165                AArch64::FMAX_VG2_2Z2Z_D}))
5166         SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5167       return;
5168     case Intrinsic::aarch64_sve_smax_x4:
5169       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5170               Node->getValueType(0),
5171               {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
5172                AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
5173         SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5174       return;
5175     case Intrinsic::aarch64_sve_umax_x4:
5176       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5177               Node->getValueType(0),
5178               {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
5179                AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
5180         SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5181       return;
5182     case Intrinsic::aarch64_sve_fmax_x4:
5183       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5184               Node->getValueType(0),
5185               {0, AArch64::FMAX_VG4_4Z4Z_H, AArch64::FMAX_VG4_4Z4Z_S,
5186                AArch64::FMAX_VG4_4Z4Z_D}))
5187         SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5188       return;
5189     case Intrinsic::aarch64_sve_smin_x2:
5190       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5191               Node->getValueType(0),
5192               {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
5193                AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
5194         SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5195       return;
5196     case Intrinsic::aarch64_sve_umin_x2:
5197       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5198               Node->getValueType(0),
5199               {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
5200                AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
5201         SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5202       return;
5203     case Intrinsic::aarch64_sve_fmin_x2:
5204       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5205               Node->getValueType(0),
5206               {0, AArch64::FMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_S,
5207                AArch64::FMIN_VG2_2Z2Z_D}))
5208         SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5209       return;
5210     case Intrinsic::aarch64_sve_smin_x4:
5211       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5212               Node->getValueType(0),
5213               {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
5214                AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
5215         SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5216       return;
5217     case Intrinsic::aarch64_sve_umin_x4:
5218       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5219               Node->getValueType(0),
5220               {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
5221                AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
5222         SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5223       return;
5224     case Intrinsic::aarch64_sve_fmin_x4:
5225       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5226               Node->getValueType(0),
5227               {0, AArch64::FMIN_VG4_4Z4Z_H, AArch64::FMIN_VG4_4Z4Z_S,
5228                AArch64::FMIN_VG4_4Z4Z_D}))
5229         SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5230       return;
5231     case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
5232       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5233               Node->getValueType(0),
5234               {0, AArch64::FMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_S,
5235                AArch64::FMAXNM_VG2_2ZZ_D}))
5236         SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5237       return;
5238     case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
5239       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5240               Node->getValueType(0),
5241               {0, AArch64::FMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_S,
5242                AArch64::FMAXNM_VG4_4ZZ_D}))
5243         SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5244       return;
5245     case Intrinsic::aarch64_sve_fminnm_single_x2:
5246       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5247               Node->getValueType(0),
5248               {0, AArch64::FMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_S,
5249                AArch64::FMINNM_VG2_2ZZ_D}))
5250         SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5251       return;
5252     case Intrinsic::aarch64_sve_fminnm_single_x4:
5253       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5254               Node->getValueType(0),
5255               {0, AArch64::FMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_S,
5256                AArch64::FMINNM_VG4_4ZZ_D}))
5257         SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5258       return;
5259     case Intrinsic::aarch64_sve_fmaxnm_x2:
5260       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5261               Node->getValueType(0),
5262               {0, AArch64::FMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_S,
5263                AArch64::FMAXNM_VG2_2Z2Z_D}))
5264         SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5265       return;
5266     case Intrinsic::aarch64_sve_fmaxnm_x4:
5267       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5268               Node->getValueType(0),
5269               {0, AArch64::FMAXNM_VG4_4Z4Z_H, AArch64::FMAXNM_VG4_4Z4Z_S,
5270                AArch64::FMAXNM_VG4_4Z4Z_D}))
5271         SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5272       return;
5273     case Intrinsic::aarch64_sve_fminnm_x2:
5274       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5275               Node->getValueType(0),
5276               {0, AArch64::FMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_S,
5277                AArch64::FMINNM_VG2_2Z2Z_D}))
5278         SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5279       return;
5280     case Intrinsic::aarch64_sve_fminnm_x4:
5281       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5282               Node->getValueType(0),
5283               {0, AArch64::FMINNM_VG4_4Z4Z_H, AArch64::FMINNM_VG4_4Z4Z_S,
5284                AArch64::FMINNM_VG4_4Z4Z_D}))
5285         SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5286       return;
5287     case Intrinsic::aarch64_sve_fcvts_x2:
5288       SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
5289       return;
5290     case Intrinsic::aarch64_sve_scvtf_x2:
5291       SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
5292       return;
5293     case Intrinsic::aarch64_sve_fcvtu_x2:
5294       SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
5295       return;
5296     case Intrinsic::aarch64_sve_ucvtf_x2:
5297       SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
5298       return;
5299     case Intrinsic::aarch64_sve_fcvts_x4:
5300       SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
5301       return;
5302     case Intrinsic::aarch64_sve_scvtf_x4:
5303       SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
5304       return;
5305     case Intrinsic::aarch64_sve_fcvtu_x4:
5306       SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
5307       return;
5308     case Intrinsic::aarch64_sve_ucvtf_x4:
5309       SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
5310       return;
5311     case Intrinsic::aarch64_sve_sclamp_single_x2:
5312       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5313               Node->getValueType(0),
5314               {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
5315                AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
5316         SelectClamp(Node, 2, Op);
5317       return;
5318     case Intrinsic::aarch64_sve_uclamp_single_x2:
5319       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5320               Node->getValueType(0),
5321               {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
5322                AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
5323         SelectClamp(Node, 2, Op);
5324       return;
5325     case Intrinsic::aarch64_sve_fclamp_single_x2:
5326       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5327               Node->getValueType(0),
5328               {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
5329                AArch64::FCLAMP_VG2_2Z2Z_D}))
5330         SelectClamp(Node, 2, Op);
5331       return;
5332     case Intrinsic::aarch64_sve_sclamp_single_x4:
5333       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5334               Node->getValueType(0),
5335               {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
5336                AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
5337         SelectClamp(Node, 4, Op);
5338       return;
5339     case Intrinsic::aarch64_sve_uclamp_single_x4:
5340       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5341               Node->getValueType(0),
5342               {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
5343                AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
5344         SelectClamp(Node, 4, Op);
5345       return;
5346     case Intrinsic::aarch64_sve_fclamp_single_x4:
5347       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5348               Node->getValueType(0),
5349               {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
5350                AArch64::FCLAMP_VG4_4Z4Z_D}))
5351         SelectClamp(Node, 4, Op);
5352       return;
5353     case Intrinsic::aarch64_sve_add_single_x2:
5354       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5355               Node->getValueType(0),
5356               {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
5357                AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
5358         SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5359       return;
5360     case Intrinsic::aarch64_sve_add_single_x4:
5361       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5362               Node->getValueType(0),
5363               {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
5364                AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
5365         SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5366       return;
5367     case Intrinsic::aarch64_sve_zip_x2:
5368       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5369               Node->getValueType(0),
5370               {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
5371                AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
5372         SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
5373       return;
5374     case Intrinsic::aarch64_sve_zipq_x2:
5375       SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
5376                                 AArch64::ZIP_VG2_2ZZZ_Q);
5377       return;
5378     case Intrinsic::aarch64_sve_zip_x4:
5379       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5380               Node->getValueType(0),
5381               {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
5382                AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
5383         SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
5384       return;
5385     case Intrinsic::aarch64_sve_zipq_x4:
5386       SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
5387                                 AArch64::ZIP_VG4_4Z4Z_Q);
5388       return;
5389     case Intrinsic::aarch64_sve_uzp_x2:
5390       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5391               Node->getValueType(0),
5392               {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
5393                AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
5394         SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
5395       return;
5396     case Intrinsic::aarch64_sve_uzpq_x2:
5397       SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
5398                                 AArch64::UZP_VG2_2ZZZ_Q);
5399       return;
5400     case Intrinsic::aarch64_sve_uzp_x4:
5401       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5402               Node->getValueType(0),
5403               {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
5404                AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
5405         SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
5406       return;
5407     case Intrinsic::aarch64_sve_uzpq_x4:
5408       SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
5409                                 AArch64::UZP_VG4_4Z4Z_Q);
5410       return;
5411     case Intrinsic::aarch64_sve_sel_x2:
5412       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5413               Node->getValueType(0),
5414               {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
5415                AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
5416         SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true);
5417       return;
5418     case Intrinsic::aarch64_sve_sel_x4:
5419       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5420               Node->getValueType(0),
5421               {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
5422                AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
5423         SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true);
5424       return;
5425     case Intrinsic::aarch64_sve_frinta_x2:
5426       SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
5427       return;
5428     case Intrinsic::aarch64_sve_frinta_x4:
5429       SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
5430       return;
5431     case Intrinsic::aarch64_sve_frintm_x2:
5432       SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
5433       return;
5434     case Intrinsic::aarch64_sve_frintm_x4:
5435       SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
5436       return;
5437     case Intrinsic::aarch64_sve_frintn_x2:
5438       SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
5439       return;
5440     case Intrinsic::aarch64_sve_frintn_x4:
5441       SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
5442       return;
5443     case Intrinsic::aarch64_sve_frintp_x2:
5444       SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
5445       return;
5446     case Intrinsic::aarch64_sve_frintp_x4:
5447       SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
5448       return;
5449     case Intrinsic::aarch64_sve_sunpk_x2:
5450       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5451               Node->getValueType(0),
5452               {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
5453                AArch64::SUNPK_VG2_2ZZ_D}))
5454         SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
5455       return;
5456     case Intrinsic::aarch64_sve_uunpk_x2:
5457       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5458               Node->getValueType(0),
5459               {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
5460                AArch64::UUNPK_VG2_2ZZ_D}))
5461         SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
5462       return;
5463     case Intrinsic::aarch64_sve_sunpk_x4:
5464       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5465               Node->getValueType(0),
5466               {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
5467                AArch64::SUNPK_VG4_4Z2Z_D}))
5468         SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
5469       return;
5470     case Intrinsic::aarch64_sve_uunpk_x4:
5471       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5472               Node->getValueType(0),
5473               {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
5474                AArch64::UUNPK_VG4_4Z2Z_D}))
5475         SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
5476       return;
5477     case Intrinsic::aarch64_sve_pext_x2: {
5478       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5479               Node->getValueType(0),
5480               {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
5481                AArch64::PEXT_2PCI_D}))
5482         SelectPExtPair(Node, Op);
5483       return;
5484     }
5485     }
5486     break;
5487   }
5488   case ISD::INTRINSIC_VOID: {
5489     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
5490     if (Node->getNumOperands() >= 3)
5491       VT = Node->getOperand(2)->getValueType(0);
5492     switch (IntNo) {
5493     default:
5494       break;
5495     case Intrinsic::aarch64_neon_st1x2: {
5496       if (VT == MVT::v8i8) {
5497         SelectStore(Node, 2, AArch64::ST1Twov8b);
5498         return;
5499       } else if (VT == MVT::v16i8) {
5500         SelectStore(Node, 2, AArch64::ST1Twov16b);
5501         return;
5502       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
5503                  VT == MVT::v4bf16) {
5504         SelectStore(Node, 2, AArch64::ST1Twov4h);
5505         return;
5506       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
5507                  VT == MVT::v8bf16) {
5508         SelectStore(Node, 2, AArch64::ST1Twov8h);
5509         return;
5510       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5511         SelectStore(Node, 2, AArch64::ST1Twov2s);
5512         return;
5513       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5514         SelectStore(Node, 2, AArch64::ST1Twov4s);
5515         return;
5516       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5517         SelectStore(Node, 2, AArch64::ST1Twov2d);
5518         return;
5519       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5520         SelectStore(Node, 2, AArch64::ST1Twov1d);
5521         return;
5522       }
5523       break;
5524     }
5525     case Intrinsic::aarch64_neon_st1x3: {
5526       if (VT == MVT::v8i8) {
5527         SelectStore(Node, 3, AArch64::ST1Threev8b);
5528         return;
5529       } else if (VT == MVT::v16i8) {
5530         SelectStore(Node, 3, AArch64::ST1Threev16b);
5531         return;
5532       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
5533                  VT == MVT::v4bf16) {
5534         SelectStore(Node, 3, AArch64::ST1Threev4h);
5535         return;
5536       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
5537                  VT == MVT::v8bf16) {
5538         SelectStore(Node, 3, AArch64::ST1Threev8h);
5539         return;
5540       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5541         SelectStore(Node, 3, AArch64::ST1Threev2s);
5542         return;
5543       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5544         SelectStore(Node, 3, AArch64::ST1Threev4s);
5545         return;
5546       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5547         SelectStore(Node, 3, AArch64::ST1Threev2d);
5548         return;
5549       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5550         SelectStore(Node, 3, AArch64::ST1Threev1d);
5551         return;
5552       }
5553       break;
5554     }
5555     case Intrinsic::aarch64_neon_st1x4: {
5556       if (VT == MVT::v8i8) {
5557         SelectStore(Node, 4, AArch64::ST1Fourv8b);
5558         return;
5559       } else if (VT == MVT::v16i8) {
5560         SelectStore(Node, 4, AArch64::ST1Fourv16b);
5561         return;
5562       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
5563                  VT == MVT::v4bf16) {
5564         SelectStore(Node, 4, AArch64::ST1Fourv4h);
5565         return;
5566       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
5567                  VT == MVT::v8bf16) {
5568         SelectStore(Node, 4, AArch64::ST1Fourv8h);
5569         return;
5570       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5571         SelectStore(Node, 4, AArch64::ST1Fourv2s);
5572         return;
5573       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5574         SelectStore(Node, 4, AArch64::ST1Fourv4s);
5575         return;
5576       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5577         SelectStore(Node, 4, AArch64::ST1Fourv2d);
5578         return;
5579       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5580         SelectStore(Node, 4, AArch64::ST1Fourv1d);
5581         return;
5582       }
5583       break;
5584     }
5585     case Intrinsic::aarch64_neon_st2: {
5586       if (VT == MVT::v8i8) {
5587         SelectStore(Node, 2, AArch64::ST2Twov8b);
5588         return;
5589       } else if (VT == MVT::v16i8) {
5590         SelectStore(Node, 2, AArch64::ST2Twov16b);
5591         return;
5592       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
5593                  VT == MVT::v4bf16) {
5594         SelectStore(Node, 2, AArch64::ST2Twov4h);
5595         return;
5596       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
5597                  VT == MVT::v8bf16) {
5598         SelectStore(Node, 2, AArch64::ST2Twov8h);
5599         return;
5600       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5601         SelectStore(Node, 2, AArch64::ST2Twov2s);
5602         return;
5603       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5604         SelectStore(Node, 2, AArch64::ST2Twov4s);
5605         return;
5606       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5607         SelectStore(Node, 2, AArch64::ST2Twov2d);
5608         return;
5609       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5610         SelectStore(Node, 2, AArch64::ST1Twov1d);
5611         return;
5612       }
5613       break;
5614     }
5615     case Intrinsic::aarch64_neon_st3: {
5616       if (VT == MVT::v8i8) {
5617         SelectStore(Node, 3, AArch64::ST3Threev8b);
5618         return;
5619       } else if (VT == MVT::v16i8) {
5620         SelectStore(Node, 3, AArch64::ST3Threev16b);
5621         return;
5622       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
5623                  VT == MVT::v4bf16) {
5624         SelectStore(Node, 3, AArch64::ST3Threev4h);
5625         return;
5626       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
5627                  VT == MVT::v8bf16) {
5628         SelectStore(Node, 3, AArch64::ST3Threev8h);
5629         return;
5630       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5631         SelectStore(Node, 3, AArch64::ST3Threev2s);
5632         return;
5633       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5634         SelectStore(Node, 3, AArch64::ST3Threev4s);
5635         return;
5636       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5637         SelectStore(Node, 3, AArch64::ST3Threev2d);
5638         return;
5639       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5640         SelectStore(Node, 3, AArch64::ST1Threev1d);
5641         return;
5642       }
5643       break;
5644     }
5645     case Intrinsic::aarch64_neon_st4: {
5646       if (VT == MVT::v8i8) {
5647         SelectStore(Node, 4, AArch64::ST4Fourv8b);
5648         return;
5649       } else if (VT == MVT::v16i8) {
5650         SelectStore(Node, 4, AArch64::ST4Fourv16b);
5651         return;
5652       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
5653                  VT == MVT::v4bf16) {
5654         SelectStore(Node, 4, AArch64::ST4Fourv4h);
5655         return;
5656       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
5657                  VT == MVT::v8bf16) {
5658         SelectStore(Node, 4, AArch64::ST4Fourv8h);
5659         return;
5660       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5661         SelectStore(Node, 4, AArch64::ST4Fourv2s);
5662         return;
5663       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5664         SelectStore(Node, 4, AArch64::ST4Fourv4s);
5665         return;
5666       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5667         SelectStore(Node, 4, AArch64::ST4Fourv2d);
5668         return;
5669       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5670         SelectStore(Node, 4, AArch64::ST1Fourv1d);
5671         return;
5672       }
5673       break;
5674     }
5675     case Intrinsic::aarch64_neon_st2lane: {
5676       if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5677         SelectStoreLane(Node, 2, AArch64::ST2i8);
5678         return;
5679       } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5680                  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5681         SelectStoreLane(Node, 2, AArch64::ST2i16);
5682         return;
5683       } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5684                  VT == MVT::v2f32) {
5685         SelectStoreLane(Node, 2, AArch64::ST2i32);
5686         return;
5687       } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5688                  VT == MVT::v1f64) {
5689         SelectStoreLane(Node, 2, AArch64::ST2i64);
5690         return;
5691       }
5692       break;
5693     }
5694     case Intrinsic::aarch64_neon_st3lane: {
5695       if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5696         SelectStoreLane(Node, 3, AArch64::ST3i8);
5697         return;
5698       } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5699                  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5700         SelectStoreLane(Node, 3, AArch64::ST3i16);
5701         return;
5702       } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5703                  VT == MVT::v2f32) {
5704         SelectStoreLane(Node, 3, AArch64::ST3i32);
5705         return;
5706       } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5707                  VT == MVT::v1f64) {
5708         SelectStoreLane(Node, 3, AArch64::ST3i64);
5709         return;
5710       }
5711       break;
5712     }
5713     case Intrinsic::aarch64_neon_st4lane: {
5714       if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5715         SelectStoreLane(Node, 4, AArch64::ST4i8);
5716         return;
5717       } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5718                  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5719         SelectStoreLane(Node, 4, AArch64::ST4i16);
5720         return;
5721       } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5722                  VT == MVT::v2f32) {
5723         SelectStoreLane(Node, 4, AArch64::ST4i32);
5724         return;
5725       } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5726                  VT == MVT::v1f64) {
5727         SelectStoreLane(Node, 4, AArch64::ST4i64);
5728         return;
5729       }
5730       break;
5731     }
5732     case Intrinsic::aarch64_sve_st2: {
5733       if (VT == MVT::nxv16i8) {
5734         SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
5735         return;
5736       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5737                  VT == MVT::nxv8bf16) {
5738         SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
5739         return;
5740       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5741         SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
5742         return;
5743       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5744         SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
5745         return;
5746       }
5747       break;
5748     }
5749     case Intrinsic::aarch64_sve_st3: {
5750       if (VT == MVT::nxv16i8) {
5751         SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
5752         return;
5753       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5754                  VT == MVT::nxv8bf16) {
5755         SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
5756         return;
5757       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5758         SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
5759         return;
5760       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5761         SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
5762         return;
5763       }
5764       break;
5765     }
5766     case Intrinsic::aarch64_sve_st4: {
5767       if (VT == MVT::nxv16i8) {
5768         SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
5769         return;
5770       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5771                  VT == MVT::nxv8bf16) {
5772         SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
5773         return;
5774       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5775         SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
5776         return;
5777       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5778         SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
5779         return;
5780       }
5781       break;
5782     }
5783     }
5784     break;
5785   }
5786   case AArch64ISD::LD2post: {
5787     if (VT == MVT::v8i8) {
5788       SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
5789       return;
5790     } else if (VT == MVT::v16i8) {
5791       SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
5792       return;
5793     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5794       SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
5795       return;
5796     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
5797       SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
5798       return;
5799     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5800       SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
5801       return;
5802     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5803       SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
5804       return;
5805     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5806       SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
5807       return;
5808     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5809       SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
5810       return;
5811     }
5812     break;
5813   }
5814   case AArch64ISD::LD3post: {
5815     if (VT == MVT::v8i8) {
5816       SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
5817       return;
5818     } else if (VT == MVT::v16i8) {
5819       SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
5820       return;
5821     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5822       SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
5823       return;
5824     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
5825       SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
5826       return;
5827     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5828       SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
5829       return;
5830     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5831       SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
5832       return;
5833     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5834       SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
5835       return;
5836     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5837       SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
5838       return;
5839     }
5840     break;
5841   }
5842   case AArch64ISD::LD4post: {
5843     if (VT == MVT::v8i8) {
5844       SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
5845       return;
5846     } else if (VT == MVT::v16i8) {
5847       SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
5848       return;
5849     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5850       SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
5851       return;
5852     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
5853       SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
5854       return;
5855     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5856       SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
5857       return;
5858     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5859       SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
5860       return;
5861     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5862       SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
5863       return;
5864     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5865       SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
5866       return;
5867     }
5868     break;
5869   }
5870   case AArch64ISD::LD1x2post: {
5871     if (VT == MVT::v8i8) {
5872       SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
5873       return;
5874     } else if (VT == MVT::v16i8) {
5875       SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
5876       return;
5877     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5878       SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
5879       return;
5880     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
5881       SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
5882       return;
5883     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5884       SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
5885       return;
5886     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5887       SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
5888       return;
5889     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5890       SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
5891       return;
5892     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5893       SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
5894       return;
5895     }
5896     break;
5897   }
5898   case AArch64ISD::LD1x3post: {
5899     if (VT == MVT::v8i8) {
5900       SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
5901       return;
5902     } else if (VT == MVT::v16i8) {
5903       SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
5904       return;
5905     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5906       SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
5907       return;
5908     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
5909       SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
5910       return;
5911     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5912       SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
5913       return;
5914     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5915       SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
5916       return;
5917     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5918       SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
5919       return;
5920     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5921       SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
5922       return;
5923     }
5924     break;
5925   }
5926   case AArch64ISD::LD1x4post: {
5927     if (VT == MVT::v8i8) {
5928       SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
5929       return;
5930     } else if (VT == MVT::v16i8) {
5931       SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
5932       return;
5933     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5934       SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
5935       return;
5936     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
5937       SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
5938       return;
5939     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5940       SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
5941       return;
5942     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5943       SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
5944       return;
5945     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5946       SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
5947       return;
5948     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5949       SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
5950       return;
5951     }
5952     break;
5953   }
5954   case AArch64ISD::LD1DUPpost: {
5955     if (VT == MVT::v8i8) {
5956       SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
5957       return;
5958     } else if (VT == MVT::v16i8) {
5959       SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
5960       return;
5961     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5962       SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
5963       return;
5964     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
5965       SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
5966       return;
5967     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5968       SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
5969       return;
5970     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5971       SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
5972       return;
5973     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5974       SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
5975       return;
5976     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5977       SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
5978       return;
5979     }
5980     break;
5981   }
5982   case AArch64ISD::LD2DUPpost: {
5983     if (VT == MVT::v8i8) {
5984       SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
5985       return;
5986     } else if (VT == MVT::v16i8) {
5987       SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
5988       return;
5989     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5990       SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
5991       return;
5992     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
5993       SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
5994       return;
5995     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5996       SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
5997       return;
5998     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5999       SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
6000       return;
6001     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6002       SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
6003       return;
6004     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6005       SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
6006       return;
6007     }
6008     break;
6009   }
6010   case AArch64ISD::LD3DUPpost: {
6011     if (VT == MVT::v8i8) {
6012       SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
6013       return;
6014     } else if (VT == MVT::v16i8) {
6015       SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
6016       return;
6017     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6018       SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
6019       return;
6020     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
6021       SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
6022       return;
6023     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6024       SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
6025       return;
6026     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6027       SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
6028       return;
6029     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6030       SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
6031       return;
6032     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6033       SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
6034       return;
6035     }
6036     break;
6037   }
6038   case AArch64ISD::LD4DUPpost: {
6039     if (VT == MVT::v8i8) {
6040       SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
6041       return;
6042     } else if (VT == MVT::v16i8) {
6043       SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
6044       return;
6045     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6046       SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
6047       return;
6048     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
6049       SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
6050       return;
6051     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6052       SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
6053       return;
6054     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6055       SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
6056       return;
6057     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6058       SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
6059       return;
6060     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6061       SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
6062       return;
6063     }
6064     break;
6065   }
6066   case AArch64ISD::LD1LANEpost: {
6067     if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6068       SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
6069       return;
6070     } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6071                VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6072       SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
6073       return;
6074     } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6075                VT == MVT::v2f32) {
6076       SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
6077       return;
6078     } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6079                VT == MVT::v1f64) {
6080       SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
6081       return;
6082     }
6083     break;
6084   }
6085   case AArch64ISD::LD2LANEpost: {
6086     if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6087       SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
6088       return;
6089     } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6090                VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6091       SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
6092       return;
6093     } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6094                VT == MVT::v2f32) {
6095       SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
6096       return;
6097     } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6098                VT == MVT::v1f64) {
6099       SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
6100       return;
6101     }
6102     break;
6103   }
6104   case AArch64ISD::LD3LANEpost: {
6105     if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6106       SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
6107       return;
6108     } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6109                VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6110       SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
6111       return;
6112     } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6113                VT == MVT::v2f32) {
6114       SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
6115       return;
6116     } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6117                VT == MVT::v1f64) {
6118       SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
6119       return;
6120     }
6121     break;
6122   }
6123   case AArch64ISD::LD4LANEpost: {
6124     if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6125       SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
6126       return;
6127     } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6128                VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6129       SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
6130       return;
6131     } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6132                VT == MVT::v2f32) {
6133       SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
6134       return;
6135     } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6136                VT == MVT::v1f64) {
6137       SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
6138       return;
6139     }
6140     break;
6141   }
6142   case AArch64ISD::ST2post: {
6143     VT = Node->getOperand(1).getValueType();
6144     if (VT == MVT::v8i8) {
6145       SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
6146       return;
6147     } else if (VT == MVT::v16i8) {
6148       SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
6149       return;
6150     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6151       SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
6152       return;
6153     } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6154       SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
6155       return;
6156     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6157       SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
6158       return;
6159     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6160       SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
6161       return;
6162     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6163       SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
6164       return;
6165     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6166       SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
6167       return;
6168     }
6169     break;
6170   }
6171   case AArch64ISD::ST3post: {
6172     VT = Node->getOperand(1).getValueType();
6173     if (VT == MVT::v8i8) {
6174       SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
6175       return;
6176     } else if (VT == MVT::v16i8) {
6177       SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
6178       return;
6179     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6180       SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
6181       return;
6182     } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6183       SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
6184       return;
6185     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6186       SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
6187       return;
6188     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6189       SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
6190       return;
6191     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6192       SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
6193       return;
6194     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6195       SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
6196       return;
6197     }
6198     break;
6199   }
6200   case AArch64ISD::ST4post: {
6201     VT = Node->getOperand(1).getValueType();
6202     if (VT == MVT::v8i8) {
6203       SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
6204       return;
6205     } else if (VT == MVT::v16i8) {
6206       SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
6207       return;
6208     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6209       SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
6210       return;
6211     } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6212       SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
6213       return;
6214     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6215       SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
6216       return;
6217     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6218       SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
6219       return;
6220     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6221       SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
6222       return;
6223     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6224       SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
6225       return;
6226     }
6227     break;
6228   }
6229   case AArch64ISD::ST1x2post: {
6230     VT = Node->getOperand(1).getValueType();
6231     if (VT == MVT::v8i8) {
6232       SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
6233       return;
6234     } else if (VT == MVT::v16i8) {
6235       SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
6236       return;
6237     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6238       SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
6239       return;
6240     } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6241       SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
6242       return;
6243     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6244       SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
6245       return;
6246     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6247       SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
6248       return;
6249     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6250       SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
6251       return;
6252     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6253       SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
6254       return;
6255     }
6256     break;
6257   }
6258   case AArch64ISD::ST1x3post: {
6259     VT = Node->getOperand(1).getValueType();
6260     if (VT == MVT::v8i8) {
6261       SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
6262       return;
6263     } else if (VT == MVT::v16i8) {
6264       SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
6265       return;
6266     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6267       SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
6268       return;
6269     } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
6270       SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
6271       return;
6272     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6273       SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
6274       return;
6275     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6276       SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
6277       return;
6278     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6279       SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
6280       return;
6281     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6282       SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
6283       return;
6284     }
6285     break;
6286   }
6287   case AArch64ISD::ST1x4post: {
6288     VT = Node->getOperand(1).getValueType();
6289     if (VT == MVT::v8i8) {
6290       SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
6291       return;
6292     } else if (VT == MVT::v16i8) {
6293       SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
6294       return;
6295     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6296       SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
6297       return;
6298     } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6299       SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
6300       return;
6301     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6302       SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
6303       return;
6304     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6305       SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
6306       return;
6307     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6308       SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
6309       return;
6310     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6311       SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
6312       return;
6313     }
6314     break;
6315   }
6316   case AArch64ISD::ST2LANEpost: {
6317     VT = Node->getOperand(1).getValueType();
6318     if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6319       SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
6320       return;
6321     } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6322                VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6323       SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
6324       return;
6325     } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6326                VT == MVT::v2f32) {
6327       SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
6328       return;
6329     } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6330                VT == MVT::v1f64) {
6331       SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
6332       return;
6333     }
6334     break;
6335   }
6336   case AArch64ISD::ST3LANEpost: {
6337     VT = Node->getOperand(1).getValueType();
6338     if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6339       SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
6340       return;
6341     } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6342                VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6343       SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
6344       return;
6345     } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6346                VT == MVT::v2f32) {
6347       SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
6348       return;
6349     } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6350                VT == MVT::v1f64) {
6351       SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
6352       return;
6353     }
6354     break;
6355   }
6356   case AArch64ISD::ST4LANEpost: {
6357     VT = Node->getOperand(1).getValueType();
6358     if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6359       SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
6360       return;
6361     } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6362                VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6363       SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
6364       return;
6365     } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6366                VT == MVT::v2f32) {
6367       SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
6368       return;
6369     } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6370                VT == MVT::v1f64) {
6371       SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
6372       return;
6373     }
6374     break;
6375   }
6376   case AArch64ISD::SVE_LD2_MERGE_ZERO: {
6377     if (VT == MVT::nxv16i8) {
6378       SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B);
6379       return;
6380     } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6381                VT == MVT::nxv8bf16) {
6382       SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H);
6383       return;
6384     } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6385       SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W);
6386       return;
6387     } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6388       SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D);
6389       return;
6390     }
6391     break;
6392   }
6393   case AArch64ISD::SVE_LD3_MERGE_ZERO: {
6394     if (VT == MVT::nxv16i8) {
6395       SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B);
6396       return;
6397     } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6398                VT == MVT::nxv8bf16) {
6399       SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H);
6400       return;
6401     } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6402       SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W);
6403       return;
6404     } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6405       SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D);
6406       return;
6407     }
6408     break;
6409   }
6410   case AArch64ISD::SVE_LD4_MERGE_ZERO: {
6411     if (VT == MVT::nxv16i8) {
6412       SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B);
6413       return;
6414     } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6415                VT == MVT::nxv8bf16) {
6416       SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H);
6417       return;
6418     } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6419       SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W);
6420       return;
6421     } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6422       SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D);
6423       return;
6424     }
6425     break;
6426   }
6427   }
6428 
6429   // Select the default instruction
6430   SelectCode(Node);
6431 }
6432 
6433 /// createAArch64ISelDag - This pass converts a legalized DAG into a
6434 /// AArch64-specific DAG, ready for instruction scheduling.
6435 FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM,
6436                                          CodeGenOpt::Level OptLevel) {
6437   return new AArch64DAGToDAGISel(TM, OptLevel);
6438 }
6439 
6440 /// When \p PredVT is a scalable vector predicate in the form
6441 /// MVT::nx<M>xi1, it builds the correspondent scalable vector of
6442 /// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
6443 /// structured vectors (NumVec >1), the output data type is
6444 /// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
6445 /// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
6446 /// EVT.
6447 static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT,
6448                                                 unsigned NumVec) {
6449   assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
6450   if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
6451     return EVT();
6452 
6453   if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
6454       PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
6455     return EVT();
6456 
6457   ElementCount EC = PredVT.getVectorElementCount();
6458   EVT ScalarVT =
6459       EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
6460   EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
6461 
6462   return MemVT;
6463 }
6464 
6465 /// Return the EVT of the data associated to a memory operation in \p
6466 /// Root. If such EVT cannot be retrived, it returns an invalid EVT.
6467 static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) {
6468   if (isa<MemSDNode>(Root))
6469     return cast<MemSDNode>(Root)->getMemoryVT();
6470 
6471   if (isa<MemIntrinsicSDNode>(Root))
6472     return cast<MemIntrinsicSDNode>(Root)->getMemoryVT();
6473 
6474   const unsigned Opcode = Root->getOpcode();
6475   // For custom ISD nodes, we have to look at them individually to extract the
6476   // type of the data moved to/from memory.
6477   switch (Opcode) {
6478   case AArch64ISD::LD1_MERGE_ZERO:
6479   case AArch64ISD::LD1S_MERGE_ZERO:
6480   case AArch64ISD::LDNF1_MERGE_ZERO:
6481   case AArch64ISD::LDNF1S_MERGE_ZERO:
6482     return cast<VTSDNode>(Root->getOperand(3))->getVT();
6483   case AArch64ISD::ST1_PRED:
6484     return cast<VTSDNode>(Root->getOperand(4))->getVT();
6485   case AArch64ISD::SVE_LD2_MERGE_ZERO:
6486     return getPackedVectorTypeFromPredicateType(
6487         Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2);
6488   case AArch64ISD::SVE_LD3_MERGE_ZERO:
6489     return getPackedVectorTypeFromPredicateType(
6490         Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3);
6491   case AArch64ISD::SVE_LD4_MERGE_ZERO:
6492     return getPackedVectorTypeFromPredicateType(
6493         Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4);
6494   default:
6495     break;
6496   }
6497 
6498   if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
6499     return EVT();
6500 
6501   switch (cast<ConstantSDNode>(Root->getOperand(1))->getZExtValue()) {
6502   default:
6503     return EVT();
6504   case Intrinsic::aarch64_sme_ldr:
6505   case Intrinsic::aarch64_sme_str:
6506     return MVT::nxv16i8;
6507   case Intrinsic::aarch64_sve_prf:
6508     // We are using an SVE prefetch intrinsic. Type must be inferred from the
6509     // width of the predicate.
6510     return getPackedVectorTypeFromPredicateType(
6511         Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
6512   case Intrinsic::aarch64_sve_ld2_sret:
6513     return getPackedVectorTypeFromPredicateType(
6514         Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
6515   case Intrinsic::aarch64_sve_ld3_sret:
6516     return getPackedVectorTypeFromPredicateType(
6517         Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
6518   case Intrinsic::aarch64_sve_ld4_sret:
6519     return getPackedVectorTypeFromPredicateType(
6520         Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
6521   }
6522 }
6523 
6524 /// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
6525 /// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
6526 /// where Root is the memory access using N for its address.
6527 template <int64_t Min, int64_t Max>
6528 bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
6529                                                    SDValue &Base,
6530                                                    SDValue &OffImm) {
6531   const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
6532   const DataLayout &DL = CurDAG->getDataLayout();
6533   const MachineFrameInfo &MFI = MF->getFrameInfo();
6534 
6535   if (N.getOpcode() == ISD::FrameIndex) {
6536     int FI = cast<FrameIndexSDNode>(N)->getIndex();
6537     // We can only encode VL scaled offsets, so only fold in frame indexes
6538     // referencing SVE objects.
6539     if (MFI.getStackID(FI) == TargetStackID::ScalableVector) {
6540       Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
6541       OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
6542       return true;
6543     }
6544 
6545     return false;
6546   }
6547 
6548   if (MemVT == EVT())
6549     return false;
6550 
6551   if (N.getOpcode() != ISD::ADD)
6552     return false;
6553 
6554   SDValue VScale = N.getOperand(1);
6555   if (VScale.getOpcode() != ISD::VSCALE)
6556     return false;
6557 
6558   TypeSize TS = MemVT.getSizeInBits();
6559   int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
6560   int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
6561 
6562   if ((MulImm % MemWidthBytes) != 0)
6563     return false;
6564 
6565   int64_t Offset = MulImm / MemWidthBytes;
6566   if (Offset < Min || Offset > Max)
6567     return false;
6568 
6569   Base = N.getOperand(0);
6570   if (Base.getOpcode() == ISD::FrameIndex) {
6571     int FI = cast<FrameIndexSDNode>(Base)->getIndex();
6572     // We can only encode VL scaled offsets, so only fold in frame indexes
6573     // referencing SVE objects.
6574     if (MFI.getStackID(FI) == TargetStackID::ScalableVector)
6575       Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
6576   }
6577 
6578   OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
6579   return true;
6580 }
6581 
6582 /// Select register plus register addressing mode for SVE, with scaled
6583 /// offset.
6584 bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
6585                                                   SDValue &Base,
6586                                                   SDValue &Offset) {
6587   if (N.getOpcode() != ISD::ADD)
6588     return false;
6589 
6590   // Process an ADD node.
6591   const SDValue LHS = N.getOperand(0);
6592   const SDValue RHS = N.getOperand(1);
6593 
6594   // 8 bit data does not come with the SHL node, so it is treated
6595   // separately.
6596   if (Scale == 0) {
6597     Base = LHS;
6598     Offset = RHS;
6599     return true;
6600   }
6601 
6602   if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
6603     int64_t ImmOff = C->getSExtValue();
6604     unsigned Size = 1 << Scale;
6605 
6606     // To use the reg+reg addressing mode, the immediate must be a multiple of
6607     // the vector element's byte size.
6608     if (ImmOff % Size)
6609       return false;
6610 
6611     SDLoc DL(N);
6612     Base = LHS;
6613     Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
6614     SDValue Ops[] = {Offset};
6615     SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
6616     Offset = SDValue(MI, 0);
6617     return true;
6618   }
6619 
6620   // Check if the RHS is a shift node with a constant.
6621   if (RHS.getOpcode() != ISD::SHL)
6622     return false;
6623 
6624   const SDValue ShiftRHS = RHS.getOperand(1);
6625   if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
6626     if (C->getZExtValue() == Scale) {
6627       Base = LHS;
6628       Offset = RHS.getOperand(0);
6629       return true;
6630     }
6631 
6632   return false;
6633 }
6634 
6635 bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
6636   const AArch64TargetLowering *TLI =
6637       static_cast<const AArch64TargetLowering *>(getTargetLowering());
6638 
6639   return TLI->isAllActivePredicate(*CurDAG, N);
6640 }
6641 
6642 bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
6643   EVT VT = N.getValueType();
6644   return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;
6645 }
6646 
6647 bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
6648                                              SDValue &Base, SDValue &Offset,
6649                                              unsigned Scale) {
6650   // Try to untangle an ADD node into a 'reg + offset'
6651   if (N.getOpcode() == ISD::ADD)
6652     if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
6653       int64_t ImmOff = C->getSExtValue();
6654       if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0))) {
6655         Base = N.getOperand(0);
6656         Offset = CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
6657         return true;
6658       }
6659     }
6660 
6661   // By default, just match reg + 0.
6662   Base = N;
6663   Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
6664   return true;
6665 }
6666