1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the ARM target.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "ARM.h"
14 #include "ARMBaseInstrInfo.h"
15 #include "ARMTargetMachine.h"
16 #include "MCTargetDesc/ARMAddressingModes.h"
17 #include "Utils/ARMBaseInfo.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/SelectionDAG.h"
24 #include "llvm/CodeGen/SelectionDAGISel.h"
25 #include "llvm/CodeGen/TargetLowering.h"
26 #include "llvm/IR/CallingConv.h"
27 #include "llvm/IR/Constants.h"
28 #include "llvm/IR/DerivedTypes.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/IntrinsicsARM.h"
32 #include "llvm/IR/LLVMContext.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Target/TargetOptions.h"
37 
38 using namespace llvm;
39 
40 #define DEBUG_TYPE "arm-isel"
41 
42 static cl::opt<bool>
43 DisableShifterOp("disable-shifter-op", cl::Hidden,
44   cl::desc("Disable isel of shifter-op"),
45   cl::init(false));
46 
47 //===--------------------------------------------------------------------===//
48 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
49 /// instructions for SelectionDAG operations.
50 ///
51 namespace {
52 
53 class ARMDAGToDAGISel : public SelectionDAGISel {
54   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
55   /// make the right decision when generating code for different targets.
56   const ARMSubtarget *Subtarget;
57 
58 public:
59   explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
60       : SelectionDAGISel(tm, OptLevel) {}
61 
62   bool runOnMachineFunction(MachineFunction &MF) override {
63     // Reset the subtarget each time through.
64     Subtarget = &MF.getSubtarget<ARMSubtarget>();
65     SelectionDAGISel::runOnMachineFunction(MF);
66     return true;
67   }
68 
69   StringRef getPassName() const override { return "ARM Instruction Selection"; }
70 
71   void PreprocessISelDAG() override;
72 
73   /// getI32Imm - Return a target constant of type i32 with the specified
74   /// value.
75   inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
76     return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
77   }
78 
79   void Select(SDNode *N) override;
80 
81   bool hasNoVMLxHazardUse(SDNode *N) const;
82   bool isShifterOpProfitable(const SDValue &Shift,
83                              ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
84   bool SelectRegShifterOperand(SDValue N, SDValue &A,
85                                SDValue &B, SDValue &C,
86                                bool CheckProfitability = true);
87   bool SelectImmShifterOperand(SDValue N, SDValue &A,
88                                SDValue &B, bool CheckProfitability = true);
89   bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
90                                     SDValue &B, SDValue &C) {
91     // Don't apply the profitability check
92     return SelectRegShifterOperand(N, A, B, C, false);
93   }
94   bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
95                                     SDValue &B) {
96     // Don't apply the profitability check
97     return SelectImmShifterOperand(N, A, B, false);
98   }
99 
100   bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
101 
102   bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
103   bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
104 
105   bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
106     const ConstantSDNode *CN = cast<ConstantSDNode>(N);
107     Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
108     Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
109     return true;
110   }
111 
112   bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
113                              SDValue &Offset, SDValue &Opc);
114   bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
115                              SDValue &Offset, SDValue &Opc);
116   bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
117                              SDValue &Offset, SDValue &Opc);
118   bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
119   bool SelectAddrMode3(SDValue N, SDValue &Base,
120                        SDValue &Offset, SDValue &Opc);
121   bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
122                              SDValue &Offset, SDValue &Opc);
123   bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
124   bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
125   bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
126   bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
127   bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
128 
129   bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
130 
131   // Thumb Addressing Modes:
132   bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
133   bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
134   bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
135                                 SDValue &OffImm);
136   bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
137                                  SDValue &OffImm);
138   bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
139                                  SDValue &OffImm);
140   bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
141                                  SDValue &OffImm);
142   bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
143   template <unsigned Shift>
144   bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
145 
146   // Thumb 2 Addressing Modes:
147   bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
148   bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
149                             SDValue &OffImm);
150   bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
151                                  SDValue &OffImm);
152   template <unsigned Shift>
153   bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);
154   bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,
155                                   unsigned Shift);
156   template <unsigned Shift>
157   bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
158   bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
159                              SDValue &OffReg, SDValue &ShImm);
160   bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
161 
162   template<int Min, int Max>
163   bool SelectImmediateInRange(SDValue N, SDValue &OffImm);
164 
165   inline bool is_so_imm(unsigned Imm) const {
166     return ARM_AM::getSOImmVal(Imm) != -1;
167   }
168 
169   inline bool is_so_imm_not(unsigned Imm) const {
170     return ARM_AM::getSOImmVal(~Imm) != -1;
171   }
172 
173   inline bool is_t2_so_imm(unsigned Imm) const {
174     return ARM_AM::getT2SOImmVal(Imm) != -1;
175   }
176 
177   inline bool is_t2_so_imm_not(unsigned Imm) const {
178     return ARM_AM::getT2SOImmVal(~Imm) != -1;
179   }
180 
181   // Include the pieces autogenerated from the target description.
182 #include "ARMGenDAGISel.inc"
183 
184 private:
185   void transferMemOperands(SDNode *Src, SDNode *Dst);
186 
187   /// Indexed (pre/post inc/dec) load matching code for ARM.
188   bool tryARMIndexedLoad(SDNode *N);
189   bool tryT1IndexedLoad(SDNode *N);
190   bool tryT2IndexedLoad(SDNode *N);
191   bool tryMVEIndexedLoad(SDNode *N);
192 
193   /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
194   /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
195   /// loads of D registers and even subregs and odd subregs of Q registers.
196   /// For NumVecs <= 2, QOpcodes1 is not used.
197   void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
198                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
199                  const uint16_t *QOpcodes1);
200 
201   /// SelectVST - Select NEON store intrinsics.  NumVecs should
202   /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
203   /// stores of D registers and even subregs and odd subregs of Q registers.
204   /// For NumVecs <= 2, QOpcodes1 is not used.
205   void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
206                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
207                  const uint16_t *QOpcodes1);
208 
209   /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
210   /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
211   /// load/store of D registers and Q registers.
212   void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
213                        unsigned NumVecs, const uint16_t *DOpcodes,
214                        const uint16_t *QOpcodes);
215 
216   /// Helper functions for setting up clusters of MVE predication operands.
217   template <typename SDValueVector>
218   void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
219                             SDValue PredicateMask);
220   template <typename SDValueVector>
221   void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
222                             SDValue PredicateMask, SDValue Inactive);
223 
224   template <typename SDValueVector>
225   void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc);
226   template <typename SDValueVector>
227   void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy);
228 
229   /// SelectMVE_WB - Select MVE writeback load/store intrinsics.
230   void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated);
231 
232   /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics.
233   void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate,
234                            bool HasSaturationOperand);
235 
236   /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics.
237   void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
238                          uint16_t OpcodeWithNoCarry, bool Add, bool Predicated);
239 
240   /// Select long MVE vector reductions with two vector operands
241   /// Stride is the number of vector element widths the instruction can operate
242   /// on:
243   /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32]
244   /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32]
245   /// Stride is used when addressing the OpcodesS array which contains multiple
246   /// opcodes for each element width.
247   /// TySize is the index into the list of element types listed above
248   void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
249                              const uint16_t *OpcodesS, const uint16_t *OpcodesU,
250                              size_t Stride, size_t TySize);
251 
252   /// Select a 64-bit MVE vector reduction with two vector operands
253   /// arm_mve_vmlldava_[predicated]
254   void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
255                          const uint16_t *OpcodesU);
256   /// Select a 72-bit MVE vector rounding reduction with two vector operands
257   /// int_arm_mve_vrmlldavha[_predicated]
258   void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
259                            const uint16_t *OpcodesU);
260 
261   /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs
262   /// should be 2 or 4. The opcode array specifies the instructions
263   /// used for 8, 16 and 32-bit lane sizes respectively, and each
264   /// pointer points to a set of NumVecs sub-opcodes used for the
265   /// different stages (e.g. VLD20 versus VLD21) of each load family.
266   void SelectMVE_VLD(SDNode *N, unsigned NumVecs,
267                      const uint16_t *const *Opcodes);
268 
269   /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
270   /// should be 1, 2, 3 or 4.  The opcode array specifies the instructions used
271   /// for loading D registers.
272   void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
273                     unsigned NumVecs, const uint16_t *DOpcodes,
274                     const uint16_t *QOpcodes0 = nullptr,
275                     const uint16_t *QOpcodes1 = nullptr);
276 
277   /// Try to select SBFX/UBFX instructions for ARM.
278   bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
279 
280   // Select special operations if node forms integer ABS pattern
281   bool tryABSOp(SDNode *N);
282 
283   bool tryReadRegister(SDNode *N);
284   bool tryWriteRegister(SDNode *N);
285 
286   bool tryInlineAsm(SDNode *N);
287 
288   void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
289 
290   void SelectCMP_SWAP(SDNode *N);
291 
292   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
293   /// inline asm expressions.
294   bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
295                                     std::vector<SDValue> &OutOps) override;
296 
297   // Form pairs of consecutive R, S, D, or Q registers.
298   SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
299   SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
300   SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
301   SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
302 
303   // Form sequences of 4 consecutive S, D, or Q registers.
304   SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
305   SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
306   SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
307 
308   // Get the alignment operand for a NEON VLD or VST instruction.
309   SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
310                         bool is64BitVector);
311 
312   /// Checks if N is a multiplication by a constant where we can extract out a
313   /// power of two from the constant so that it can be used in a shift, but only
314   /// if it simplifies the materialization of the constant. Returns true if it
315   /// is, and assigns to PowerOfTwo the power of two that should be extracted
316   /// out and to NewMulConst the new constant to be multiplied by.
317   bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
318                               unsigned &PowerOfTwo, SDValue &NewMulConst) const;
319 
320   /// Replace N with M in CurDAG, in a way that also ensures that M gets
321   /// selected when N would have been selected.
322   void replaceDAGValue(const SDValue &N, SDValue M);
323 };
324 }
325 
326 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
327 /// operand. If so Imm will receive the 32-bit value.
328 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
329   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
330     Imm = cast<ConstantSDNode>(N)->getZExtValue();
331     return true;
332   }
333   return false;
334 }
335 
336 // isInt32Immediate - This method tests to see if a constant operand.
337 // If so Imm will receive the 32 bit value.
338 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
339   return isInt32Immediate(N.getNode(), Imm);
340 }
341 
342 // isOpcWithIntImmediate - This method tests to see if the node is a specific
343 // opcode and that it has a immediate integer right operand.
344 // If so Imm will receive the 32 bit value.
345 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
346   return N->getOpcode() == Opc &&
347          isInt32Immediate(N->getOperand(1).getNode(), Imm);
348 }
349 
350 /// Check whether a particular node is a constant value representable as
351 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
352 ///
353 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
354 static bool isScaledConstantInRange(SDValue Node, int Scale,
355                                     int RangeMin, int RangeMax,
356                                     int &ScaledConstant) {
357   assert(Scale > 0 && "Invalid scale!");
358 
359   // Check that this is a constant.
360   const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
361   if (!C)
362     return false;
363 
364   ScaledConstant = (int) C->getZExtValue();
365   if ((ScaledConstant % Scale) != 0)
366     return false;
367 
368   ScaledConstant /= Scale;
369   return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
370 }
371 
372 void ARMDAGToDAGISel::PreprocessISelDAG() {
373   if (!Subtarget->hasV6T2Ops())
374     return;
375 
376   bool isThumb2 = Subtarget->isThumb();
377   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
378        E = CurDAG->allnodes_end(); I != E; ) {
379     SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
380 
381     if (N->getOpcode() != ISD::ADD)
382       continue;
383 
384     // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
385     // leading zeros, followed by consecutive set bits, followed by 1 or 2
386     // trailing zeros, e.g. 1020.
387     // Transform the expression to
388     // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
389     // of trailing zeros of c2. The left shift would be folded as an shifter
390     // operand of 'add' and the 'and' and 'srl' would become a bits extraction
391     // node (UBFX).
392 
393     SDValue N0 = N->getOperand(0);
394     SDValue N1 = N->getOperand(1);
395     unsigned And_imm = 0;
396     if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
397       if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
398         std::swap(N0, N1);
399     }
400     if (!And_imm)
401       continue;
402 
403     // Check if the AND mask is an immediate of the form: 000.....1111111100
404     unsigned TZ = countTrailingZeros(And_imm);
405     if (TZ != 1 && TZ != 2)
406       // Be conservative here. Shifter operands aren't always free. e.g. On
407       // Swift, left shifter operand of 1 / 2 for free but others are not.
408       // e.g.
409       //  ubfx   r3, r1, #16, #8
410       //  ldr.w  r3, [r0, r3, lsl #2]
411       // vs.
412       //  mov.w  r9, #1020
413       //  and.w  r2, r9, r1, lsr #14
414       //  ldr    r2, [r0, r2]
415       continue;
416     And_imm >>= TZ;
417     if (And_imm & (And_imm + 1))
418       continue;
419 
420     // Look for (and (srl X, c1), c2).
421     SDValue Srl = N1.getOperand(0);
422     unsigned Srl_imm = 0;
423     if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
424         (Srl_imm <= 2))
425       continue;
426 
427     // Make sure first operand is not a shifter operand which would prevent
428     // folding of the left shift.
429     SDValue CPTmp0;
430     SDValue CPTmp1;
431     SDValue CPTmp2;
432     if (isThumb2) {
433       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
434         continue;
435     } else {
436       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
437           SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
438         continue;
439     }
440 
441     // Now make the transformation.
442     Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
443                           Srl.getOperand(0),
444                           CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
445                                               MVT::i32));
446     N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
447                          Srl,
448                          CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
449     N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
450                          N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
451     CurDAG->UpdateNodeOperands(N, N0, N1);
452   }
453 }
454 
455 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
456 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
457 /// least on current ARM implementations) which should be avoidded.
458 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
459   if (OptLevel == CodeGenOpt::None)
460     return true;
461 
462   if (!Subtarget->hasVMLxHazards())
463     return true;
464 
465   if (!N->hasOneUse())
466     return false;
467 
468   SDNode *Use = *N->use_begin();
469   if (Use->getOpcode() == ISD::CopyToReg)
470     return true;
471   if (Use->isMachineOpcode()) {
472     const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
473         CurDAG->getSubtarget().getInstrInfo());
474 
475     const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
476     if (MCID.mayStore())
477       return true;
478     unsigned Opcode = MCID.getOpcode();
479     if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
480       return true;
481     // vmlx feeding into another vmlx. We actually want to unfold
482     // the use later in the MLxExpansion pass. e.g.
483     // vmla
484     // vmla (stall 8 cycles)
485     //
486     // vmul (5 cycles)
487     // vadd (5 cycles)
488     // vmla
489     // This adds up to about 18 - 19 cycles.
490     //
491     // vmla
492     // vmul (stall 4 cycles)
493     // vadd adds up to about 14 cycles.
494     return TII->isFpMLxInstruction(Opcode);
495   }
496 
497   return false;
498 }
499 
500 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
501                                             ARM_AM::ShiftOpc ShOpcVal,
502                                             unsigned ShAmt) {
503   if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
504     return true;
505   if (Shift.hasOneUse())
506     return true;
507   // R << 2 is free.
508   return ShOpcVal == ARM_AM::lsl &&
509          (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
510 }
511 
512 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
513                                              unsigned MaxShift,
514                                              unsigned &PowerOfTwo,
515                                              SDValue &NewMulConst) const {
516   assert(N.getOpcode() == ISD::MUL);
517   assert(MaxShift > 0);
518 
519   // If the multiply is used in more than one place then changing the constant
520   // will make other uses incorrect, so don't.
521   if (!N.hasOneUse()) return false;
522   // Check if the multiply is by a constant
523   ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
524   if (!MulConst) return false;
525   // If the constant is used in more than one place then modifying it will mean
526   // we need to materialize two constants instead of one, which is a bad idea.
527   if (!MulConst->hasOneUse()) return false;
528   unsigned MulConstVal = MulConst->getZExtValue();
529   if (MulConstVal == 0) return false;
530 
531   // Find the largest power of 2 that MulConstVal is a multiple of
532   PowerOfTwo = MaxShift;
533   while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
534     --PowerOfTwo;
535     if (PowerOfTwo == 0) return false;
536   }
537 
538   // Only optimise if the new cost is better
539   unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
540   NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
541   unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget);
542   unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget);
543   return NewCost < OldCost;
544 }
545 
546 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
547   CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
548   ReplaceUses(N, M);
549 }
550 
551 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
552                                               SDValue &BaseReg,
553                                               SDValue &Opc,
554                                               bool CheckProfitability) {
555   if (DisableShifterOp)
556     return false;
557 
558   // If N is a multiply-by-constant and it's profitable to extract a shift and
559   // use it in a shifted operand do so.
560   if (N.getOpcode() == ISD::MUL) {
561     unsigned PowerOfTwo = 0;
562     SDValue NewMulConst;
563     if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
564       HandleSDNode Handle(N);
565       SDLoc Loc(N);
566       replaceDAGValue(N.getOperand(1), NewMulConst);
567       BaseReg = Handle.getValue();
568       Opc = CurDAG->getTargetConstant(
569           ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
570       return true;
571     }
572   }
573 
574   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
575 
576   // Don't match base register only case. That is matched to a separate
577   // lower complexity pattern with explicit register operand.
578   if (ShOpcVal == ARM_AM::no_shift) return false;
579 
580   BaseReg = N.getOperand(0);
581   unsigned ShImmVal = 0;
582   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
583   if (!RHS) return false;
584   ShImmVal = RHS->getZExtValue() & 31;
585   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
586                                   SDLoc(N), MVT::i32);
587   return true;
588 }
589 
590 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
591                                               SDValue &BaseReg,
592                                               SDValue &ShReg,
593                                               SDValue &Opc,
594                                               bool CheckProfitability) {
595   if (DisableShifterOp)
596     return false;
597 
598   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
599 
600   // Don't match base register only case. That is matched to a separate
601   // lower complexity pattern with explicit register operand.
602   if (ShOpcVal == ARM_AM::no_shift) return false;
603 
604   BaseReg = N.getOperand(0);
605   unsigned ShImmVal = 0;
606   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
607   if (RHS) return false;
608 
609   ShReg = N.getOperand(1);
610   if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
611     return false;
612   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
613                                   SDLoc(N), MVT::i32);
614   return true;
615 }
616 
617 // Determine whether an ISD::OR's operands are suitable to turn the operation
618 // into an addition, which often has more compact encodings.
619 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
620   assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
621   Out = N;
622   return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
623 }
624 
625 
626 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
627                                           SDValue &Base,
628                                           SDValue &OffImm) {
629   // Match simple R + imm12 operands.
630 
631   // Base only.
632   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
633       !CurDAG->isBaseWithConstantOffset(N)) {
634     if (N.getOpcode() == ISD::FrameIndex) {
635       // Match frame index.
636       int FI = cast<FrameIndexSDNode>(N)->getIndex();
637       Base = CurDAG->getTargetFrameIndex(
638           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
639       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
640       return true;
641     }
642 
643     if (N.getOpcode() == ARMISD::Wrapper &&
644         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
645         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
646         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
647       Base = N.getOperand(0);
648     } else
649       Base = N;
650     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
651     return true;
652   }
653 
654   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
655     int RHSC = (int)RHS->getSExtValue();
656     if (N.getOpcode() == ISD::SUB)
657       RHSC = -RHSC;
658 
659     if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
660       Base   = N.getOperand(0);
661       if (Base.getOpcode() == ISD::FrameIndex) {
662         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
663         Base = CurDAG->getTargetFrameIndex(
664             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
665       }
666       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
667       return true;
668     }
669   }
670 
671   // Base only.
672   Base = N;
673   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
674   return true;
675 }
676 
677 
678 
679 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
680                                       SDValue &Opc) {
681   if (N.getOpcode() == ISD::MUL &&
682       ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
683     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
684       // X * [3,5,9] -> X + X * [2,4,8] etc.
685       int RHSC = (int)RHS->getZExtValue();
686       if (RHSC & 1) {
687         RHSC = RHSC & ~1;
688         ARM_AM::AddrOpc AddSub = ARM_AM::add;
689         if (RHSC < 0) {
690           AddSub = ARM_AM::sub;
691           RHSC = - RHSC;
692         }
693         if (isPowerOf2_32(RHSC)) {
694           unsigned ShAmt = Log2_32(RHSC);
695           Base = Offset = N.getOperand(0);
696           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
697                                                             ARM_AM::lsl),
698                                           SDLoc(N), MVT::i32);
699           return true;
700         }
701       }
702     }
703   }
704 
705   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
706       // ISD::OR that is equivalent to an ISD::ADD.
707       !CurDAG->isBaseWithConstantOffset(N))
708     return false;
709 
710   // Leave simple R +/- imm12 operands for LDRi12
711   if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
712     int RHSC;
713     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
714                                 -0x1000+1, 0x1000, RHSC)) // 12 bits.
715       return false;
716   }
717 
718   // Otherwise this is R +/- [possibly shifted] R.
719   ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
720   ARM_AM::ShiftOpc ShOpcVal =
721     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
722   unsigned ShAmt = 0;
723 
724   Base   = N.getOperand(0);
725   Offset = N.getOperand(1);
726 
727   if (ShOpcVal != ARM_AM::no_shift) {
728     // Check to see if the RHS of the shift is a constant, if not, we can't fold
729     // it.
730     if (ConstantSDNode *Sh =
731            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
732       ShAmt = Sh->getZExtValue();
733       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
734         Offset = N.getOperand(1).getOperand(0);
735       else {
736         ShAmt = 0;
737         ShOpcVal = ARM_AM::no_shift;
738       }
739     } else {
740       ShOpcVal = ARM_AM::no_shift;
741     }
742   }
743 
744   // Try matching (R shl C) + (R).
745   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
746       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
747         N.getOperand(0).hasOneUse())) {
748     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
749     if (ShOpcVal != ARM_AM::no_shift) {
750       // Check to see if the RHS of the shift is a constant, if not, we can't
751       // fold it.
752       if (ConstantSDNode *Sh =
753           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
754         ShAmt = Sh->getZExtValue();
755         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
756           Offset = N.getOperand(0).getOperand(0);
757           Base = N.getOperand(1);
758         } else {
759           ShAmt = 0;
760           ShOpcVal = ARM_AM::no_shift;
761         }
762       } else {
763         ShOpcVal = ARM_AM::no_shift;
764       }
765     }
766   }
767 
768   // If Offset is a multiply-by-constant and it's profitable to extract a shift
769   // and use it in a shifted operand do so.
770   if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
771     unsigned PowerOfTwo = 0;
772     SDValue NewMulConst;
773     if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
774       HandleSDNode Handle(Offset);
775       replaceDAGValue(Offset.getOperand(1), NewMulConst);
776       Offset = Handle.getValue();
777       ShAmt = PowerOfTwo;
778       ShOpcVal = ARM_AM::lsl;
779     }
780   }
781 
782   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
783                                   SDLoc(N), MVT::i32);
784   return true;
785 }
786 
787 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
788                                             SDValue &Offset, SDValue &Opc) {
789   unsigned Opcode = Op->getOpcode();
790   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
791     ? cast<LoadSDNode>(Op)->getAddressingMode()
792     : cast<StoreSDNode>(Op)->getAddressingMode();
793   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
794     ? ARM_AM::add : ARM_AM::sub;
795   int Val;
796   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
797     return false;
798 
799   Offset = N;
800   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
801   unsigned ShAmt = 0;
802   if (ShOpcVal != ARM_AM::no_shift) {
803     // Check to see if the RHS of the shift is a constant, if not, we can't fold
804     // it.
805     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
806       ShAmt = Sh->getZExtValue();
807       if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
808         Offset = N.getOperand(0);
809       else {
810         ShAmt = 0;
811         ShOpcVal = ARM_AM::no_shift;
812       }
813     } else {
814       ShOpcVal = ARM_AM::no_shift;
815     }
816   }
817 
818   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
819                                   SDLoc(N), MVT::i32);
820   return true;
821 }
822 
823 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
824                                             SDValue &Offset, SDValue &Opc) {
825   unsigned Opcode = Op->getOpcode();
826   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
827     ? cast<LoadSDNode>(Op)->getAddressingMode()
828     : cast<StoreSDNode>(Op)->getAddressingMode();
829   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
830     ? ARM_AM::add : ARM_AM::sub;
831   int Val;
832   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
833     if (AddSub == ARM_AM::sub) Val *= -1;
834     Offset = CurDAG->getRegister(0, MVT::i32);
835     Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
836     return true;
837   }
838 
839   return false;
840 }
841 
842 
843 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
844                                             SDValue &Offset, SDValue &Opc) {
845   unsigned Opcode = Op->getOpcode();
846   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
847     ? cast<LoadSDNode>(Op)->getAddressingMode()
848     : cast<StoreSDNode>(Op)->getAddressingMode();
849   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
850     ? ARM_AM::add : ARM_AM::sub;
851   int Val;
852   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
853     Offset = CurDAG->getRegister(0, MVT::i32);
854     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
855                                                       ARM_AM::no_shift),
856                                     SDLoc(Op), MVT::i32);
857     return true;
858   }
859 
860   return false;
861 }
862 
863 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
864   Base = N;
865   return true;
866 }
867 
868 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
869                                       SDValue &Base, SDValue &Offset,
870                                       SDValue &Opc) {
871   if (N.getOpcode() == ISD::SUB) {
872     // X - C  is canonicalize to X + -C, no need to handle it here.
873     Base = N.getOperand(0);
874     Offset = N.getOperand(1);
875     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
876                                     MVT::i32);
877     return true;
878   }
879 
880   if (!CurDAG->isBaseWithConstantOffset(N)) {
881     Base = N;
882     if (N.getOpcode() == ISD::FrameIndex) {
883       int FI = cast<FrameIndexSDNode>(N)->getIndex();
884       Base = CurDAG->getTargetFrameIndex(
885           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
886     }
887     Offset = CurDAG->getRegister(0, MVT::i32);
888     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
889                                     MVT::i32);
890     return true;
891   }
892 
893   // If the RHS is +/- imm8, fold into addr mode.
894   int RHSC;
895   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
896                               -256 + 1, 256, RHSC)) { // 8 bits.
897     Base = N.getOperand(0);
898     if (Base.getOpcode() == ISD::FrameIndex) {
899       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
900       Base = CurDAG->getTargetFrameIndex(
901           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
902     }
903     Offset = CurDAG->getRegister(0, MVT::i32);
904 
905     ARM_AM::AddrOpc AddSub = ARM_AM::add;
906     if (RHSC < 0) {
907       AddSub = ARM_AM::sub;
908       RHSC = -RHSC;
909     }
910     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
911                                     MVT::i32);
912     return true;
913   }
914 
915   Base = N.getOperand(0);
916   Offset = N.getOperand(1);
917   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
918                                   MVT::i32);
919   return true;
920 }
921 
922 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
923                                             SDValue &Offset, SDValue &Opc) {
924   unsigned Opcode = Op->getOpcode();
925   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
926     ? cast<LoadSDNode>(Op)->getAddressingMode()
927     : cast<StoreSDNode>(Op)->getAddressingMode();
928   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
929     ? ARM_AM::add : ARM_AM::sub;
930   int Val;
931   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
932     Offset = CurDAG->getRegister(0, MVT::i32);
933     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
934                                     MVT::i32);
935     return true;
936   }
937 
938   Offset = N;
939   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
940                                   MVT::i32);
941   return true;
942 }
943 
944 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
945                                         bool FP16) {
946   if (!CurDAG->isBaseWithConstantOffset(N)) {
947     Base = N;
948     if (N.getOpcode() == ISD::FrameIndex) {
949       int FI = cast<FrameIndexSDNode>(N)->getIndex();
950       Base = CurDAG->getTargetFrameIndex(
951           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
952     } else if (N.getOpcode() == ARMISD::Wrapper &&
953                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
954                N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
955                N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
956       Base = N.getOperand(0);
957     }
958     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
959                                        SDLoc(N), MVT::i32);
960     return true;
961   }
962 
963   // If the RHS is +/- imm8, fold into addr mode.
964   int RHSC;
965   const int Scale = FP16 ? 2 : 4;
966 
967   if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) {
968     Base = N.getOperand(0);
969     if (Base.getOpcode() == ISD::FrameIndex) {
970       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
971       Base = CurDAG->getTargetFrameIndex(
972           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
973     }
974 
975     ARM_AM::AddrOpc AddSub = ARM_AM::add;
976     if (RHSC < 0) {
977       AddSub = ARM_AM::sub;
978       RHSC = -RHSC;
979     }
980 
981     if (FP16)
982       Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
983                                          SDLoc(N), MVT::i32);
984     else
985       Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
986                                          SDLoc(N), MVT::i32);
987 
988     return true;
989   }
990 
991   Base = N;
992 
993   if (FP16)
994     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
995                                        SDLoc(N), MVT::i32);
996   else
997     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
998                                        SDLoc(N), MVT::i32);
999 
1000   return true;
1001 }
1002 
1003 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1004                                       SDValue &Base, SDValue &Offset) {
1005   return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
1006 }
1007 
1008 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
1009                                           SDValue &Base, SDValue &Offset) {
1010   return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
1011 }
1012 
1013 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1014                                       SDValue &Align) {
1015   Addr = N;
1016 
1017   unsigned Alignment = 0;
1018 
1019   MemSDNode *MemN = cast<MemSDNode>(Parent);
1020 
1021   if (isa<LSBaseSDNode>(MemN) ||
1022       ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1023         MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1024        MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
1025     // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1026     // The maximum alignment is equal to the memory size being referenced.
1027     unsigned MMOAlign = MemN->getAlignment();
1028     unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1029     if (MMOAlign >= MemSize && MemSize > 1)
1030       Alignment = MemSize;
1031   } else {
1032     // All other uses of addrmode6 are for intrinsics.  For now just record
1033     // the raw alignment value; it will be refined later based on the legal
1034     // alignment operands for the intrinsic.
1035     Alignment = MemN->getAlignment();
1036   }
1037 
1038   Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1039   return true;
1040 }
1041 
1042 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1043                                             SDValue &Offset) {
1044   LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1045   ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1046   if (AM != ISD::POST_INC)
1047     return false;
1048   Offset = N;
1049   if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1050     if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1051       Offset = CurDAG->getRegister(0, MVT::i32);
1052   }
1053   return true;
1054 }
1055 
1056 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1057                                        SDValue &Offset, SDValue &Label) {
1058   if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1059     Offset = N.getOperand(0);
1060     SDValue N1 = N.getOperand(1);
1061     Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1062                                       SDLoc(N), MVT::i32);
1063     return true;
1064   }
1065 
1066   return false;
1067 }
1068 
1069 
1070 //===----------------------------------------------------------------------===//
1071 //                         Thumb Addressing Modes
1072 //===----------------------------------------------------------------------===//
1073 
1074 static bool shouldUseZeroOffsetLdSt(SDValue N) {
1075   // Negative numbers are difficult to materialise in thumb1. If we are
1076   // selecting the add of a negative, instead try to select ri with a zero
1077   // offset, so create the add node directly which will become a sub.
1078   if (N.getOpcode() != ISD::ADD)
1079     return false;
1080 
1081   // Look for an imm which is not legal for ld/st, but is legal for sub.
1082   if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1083     return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
1084 
1085   return false;
1086 }
1087 
1088 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1089                                                 SDValue &Offset) {
1090   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1091     ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1092     if (!NC || !NC->isNullValue())
1093       return false;
1094 
1095     Base = Offset = N;
1096     return true;
1097   }
1098 
1099   Base = N.getOperand(0);
1100   Offset = N.getOperand(1);
1101   return true;
1102 }
1103 
1104 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1105                                             SDValue &Offset) {
1106   if (shouldUseZeroOffsetLdSt(N))
1107     return false; // Select ri instead
1108   return SelectThumbAddrModeRRSext(N, Base, Offset);
1109 }
1110 
1111 bool
1112 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1113                                           SDValue &Base, SDValue &OffImm) {
1114   if (shouldUseZeroOffsetLdSt(N)) {
1115     Base = N;
1116     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1117     return true;
1118   }
1119 
1120   if (!CurDAG->isBaseWithConstantOffset(N)) {
1121     if (N.getOpcode() == ISD::ADD) {
1122       return false; // We want to select register offset instead
1123     } else if (N.getOpcode() == ARMISD::Wrapper &&
1124         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1125         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1126         N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1127         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1128       Base = N.getOperand(0);
1129     } else {
1130       Base = N;
1131     }
1132 
1133     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1134     return true;
1135   }
1136 
1137   // If the RHS is + imm5 * scale, fold into addr mode.
1138   int RHSC;
1139   if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1140     Base = N.getOperand(0);
1141     OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1142     return true;
1143   }
1144 
1145   // Offset is too large, so use register offset instead.
1146   return false;
1147 }
1148 
1149 bool
1150 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1151                                            SDValue &OffImm) {
1152   return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1153 }
1154 
1155 bool
1156 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1157                                            SDValue &OffImm) {
1158   return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1159 }
1160 
1161 bool
1162 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1163                                            SDValue &OffImm) {
1164   return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1165 }
1166 
1167 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1168                                             SDValue &Base, SDValue &OffImm) {
1169   if (N.getOpcode() == ISD::FrameIndex) {
1170     int FI = cast<FrameIndexSDNode>(N)->getIndex();
1171     // Only multiples of 4 are allowed for the offset, so the frame object
1172     // alignment must be at least 4.
1173     MachineFrameInfo &MFI = MF->getFrameInfo();
1174     if (MFI.getObjectAlignment(FI) < 4)
1175       MFI.setObjectAlignment(FI, 4);
1176     Base = CurDAG->getTargetFrameIndex(
1177         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1178     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1179     return true;
1180   }
1181 
1182   if (!CurDAG->isBaseWithConstantOffset(N))
1183     return false;
1184 
1185   if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
1186     // If the RHS is + imm8 * scale, fold into addr mode.
1187     int RHSC;
1188     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1189       Base = N.getOperand(0);
1190       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1191       // Make sure the offset is inside the object, or we might fail to
1192       // allocate an emergency spill slot. (An out-of-range access is UB, but
1193       // it could show up anyway.)
1194       MachineFrameInfo &MFI = MF->getFrameInfo();
1195       if (RHSC * 4 < MFI.getObjectSize(FI)) {
1196         // For LHS+RHS to result in an offset that's a multiple of 4 the object
1197         // indexed by the LHS must be 4-byte aligned.
1198         if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlignment(FI) < 4)
1199           MFI.setObjectAlignment(FI, 4);
1200         if (MFI.getObjectAlignment(FI) >= 4) {
1201           Base = CurDAG->getTargetFrameIndex(
1202               FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1203           OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1204           return true;
1205         }
1206       }
1207     }
1208   }
1209 
1210   return false;
1211 }
1212 
1213 template <unsigned Shift>
1214 bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base,
1215                                           SDValue &OffImm) {
1216   if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1217     int RHSC;
1218     if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1219                                 RHSC)) {
1220       Base = N.getOperand(0);
1221       if (N.getOpcode() == ISD::SUB)
1222         RHSC = -RHSC;
1223       OffImm =
1224           CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1225       return true;
1226     }
1227   }
1228 
1229   // Base only.
1230   Base = N;
1231   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1232   return true;
1233 }
1234 
1235 
1236 //===----------------------------------------------------------------------===//
1237 //                        Thumb 2 Addressing Modes
1238 //===----------------------------------------------------------------------===//
1239 
1240 
1241 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1242                                             SDValue &Base, SDValue &OffImm) {
1243   // Match simple R + imm12 operands.
1244 
1245   // Base only.
1246   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1247       !CurDAG->isBaseWithConstantOffset(N)) {
1248     if (N.getOpcode() == ISD::FrameIndex) {
1249       // Match frame index.
1250       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1251       Base = CurDAG->getTargetFrameIndex(
1252           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1253       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1254       return true;
1255     }
1256 
1257     if (N.getOpcode() == ARMISD::Wrapper &&
1258         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1259         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1260         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1261       Base = N.getOperand(0);
1262       if (Base.getOpcode() == ISD::TargetConstantPool)
1263         return false;  // We want to select t2LDRpci instead.
1264     } else
1265       Base = N;
1266     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1267     return true;
1268   }
1269 
1270   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1271     if (SelectT2AddrModeImm8(N, Base, OffImm))
1272       // Let t2LDRi8 handle (R - imm8).
1273       return false;
1274 
1275     int RHSC = (int)RHS->getZExtValue();
1276     if (N.getOpcode() == ISD::SUB)
1277       RHSC = -RHSC;
1278 
1279     if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1280       Base   = N.getOperand(0);
1281       if (Base.getOpcode() == ISD::FrameIndex) {
1282         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1283         Base = CurDAG->getTargetFrameIndex(
1284             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1285       }
1286       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1287       return true;
1288     }
1289   }
1290 
1291   // Base only.
1292   Base = N;
1293   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1294   return true;
1295 }
1296 
1297 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1298                                            SDValue &Base, SDValue &OffImm) {
1299   // Match simple R - imm8 operands.
1300   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1301       !CurDAG->isBaseWithConstantOffset(N))
1302     return false;
1303 
1304   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1305     int RHSC = (int)RHS->getSExtValue();
1306     if (N.getOpcode() == ISD::SUB)
1307       RHSC = -RHSC;
1308 
1309     if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1310       Base = N.getOperand(0);
1311       if (Base.getOpcode() == ISD::FrameIndex) {
1312         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1313         Base = CurDAG->getTargetFrameIndex(
1314             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1315       }
1316       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1317       return true;
1318     }
1319   }
1320 
1321   return false;
1322 }
1323 
1324 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1325                                                  SDValue &OffImm){
1326   unsigned Opcode = Op->getOpcode();
1327   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1328     ? cast<LoadSDNode>(Op)->getAddressingMode()
1329     : cast<StoreSDNode>(Op)->getAddressingMode();
1330   int RHSC;
1331   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1332     OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1333       ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1334       : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1335     return true;
1336   }
1337 
1338   return false;
1339 }
1340 
1341 template <unsigned Shift>
1342 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base,
1343                                            SDValue &OffImm) {
1344   if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1345     int RHSC;
1346     if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1347                                 RHSC)) {
1348       Base = N.getOperand(0);
1349       if (Base.getOpcode() == ISD::FrameIndex) {
1350         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1351         Base = CurDAG->getTargetFrameIndex(
1352             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1353       }
1354 
1355       if (N.getOpcode() == ISD::SUB)
1356         RHSC = -RHSC;
1357       OffImm =
1358           CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1359       return true;
1360     }
1361   }
1362 
1363   // Base only.
1364   Base = N;
1365   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1366   return true;
1367 }
1368 
1369 template <unsigned Shift>
1370 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1371                                                  SDValue &OffImm) {
1372   return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);
1373 }
1374 
1375 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1376                                                  SDValue &OffImm,
1377                                                  unsigned Shift) {
1378   unsigned Opcode = Op->getOpcode();
1379   ISD::MemIndexedMode AM;
1380   switch (Opcode) {
1381   case ISD::LOAD:
1382     AM = cast<LoadSDNode>(Op)->getAddressingMode();
1383     break;
1384   case ISD::STORE:
1385     AM = cast<StoreSDNode>(Op)->getAddressingMode();
1386     break;
1387   case ISD::MLOAD:
1388     AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode();
1389     break;
1390   case ISD::MSTORE:
1391     AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode();
1392     break;
1393   default:
1394     llvm_unreachable("Unexpected Opcode for Imm7Offset");
1395   }
1396 
1397   int RHSC;
1398   // 7 bit constant, shifted by Shift.
1399   if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) {
1400     OffImm =
1401         ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1402             ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32)
1403             : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N),
1404                                         MVT::i32);
1405     return true;
1406   }
1407   return false;
1408 }
1409 
1410 template <int Min, int Max>
1411 bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) {
1412   int Val;
1413   if (isScaledConstantInRange(N, 1, Min, Max, Val)) {
1414     OffImm = CurDAG->getTargetConstant(Val, SDLoc(N), MVT::i32);
1415     return true;
1416   }
1417   return false;
1418 }
1419 
1420 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1421                                             SDValue &Base,
1422                                             SDValue &OffReg, SDValue &ShImm) {
1423   // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1424   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1425     return false;
1426 
1427   // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1428   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1429     int RHSC = (int)RHS->getZExtValue();
1430     if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1431       return false;
1432     else if (RHSC < 0 && RHSC >= -255) // 8 bits
1433       return false;
1434   }
1435 
1436   // Look for (R + R) or (R + (R << [1,2,3])).
1437   unsigned ShAmt = 0;
1438   Base   = N.getOperand(0);
1439   OffReg = N.getOperand(1);
1440 
1441   // Swap if it is ((R << c) + R).
1442   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1443   if (ShOpcVal != ARM_AM::lsl) {
1444     ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1445     if (ShOpcVal == ARM_AM::lsl)
1446       std::swap(Base, OffReg);
1447   }
1448 
1449   if (ShOpcVal == ARM_AM::lsl) {
1450     // Check to see if the RHS of the shift is a constant, if not, we can't fold
1451     // it.
1452     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1453       ShAmt = Sh->getZExtValue();
1454       if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1455         OffReg = OffReg.getOperand(0);
1456       else {
1457         ShAmt = 0;
1458       }
1459     }
1460   }
1461 
1462   // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1463   // and use it in a shifted operand do so.
1464   if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1465     unsigned PowerOfTwo = 0;
1466     SDValue NewMulConst;
1467     if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1468       HandleSDNode Handle(OffReg);
1469       replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1470       OffReg = Handle.getValue();
1471       ShAmt = PowerOfTwo;
1472     }
1473   }
1474 
1475   ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1476 
1477   return true;
1478 }
1479 
1480 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1481                                                 SDValue &OffImm) {
1482   // This *must* succeed since it's used for the irreplaceable ldrex and strex
1483   // instructions.
1484   Base = N;
1485   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1486 
1487   if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1488     return true;
1489 
1490   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1491   if (!RHS)
1492     return true;
1493 
1494   uint32_t RHSC = (int)RHS->getZExtValue();
1495   if (RHSC > 1020 || RHSC % 4 != 0)
1496     return true;
1497 
1498   Base = N.getOperand(0);
1499   if (Base.getOpcode() == ISD::FrameIndex) {
1500     int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1501     Base = CurDAG->getTargetFrameIndex(
1502         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1503   }
1504 
1505   OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1506   return true;
1507 }
1508 
1509 //===--------------------------------------------------------------------===//
1510 
1511 /// getAL - Returns a ARMCC::AL immediate node.
1512 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1513   return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1514 }
1515 
1516 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1517   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1518   CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
1519 }
1520 
1521 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1522   LoadSDNode *LD = cast<LoadSDNode>(N);
1523   ISD::MemIndexedMode AM = LD->getAddressingMode();
1524   if (AM == ISD::UNINDEXED)
1525     return false;
1526 
1527   EVT LoadedVT = LD->getMemoryVT();
1528   SDValue Offset, AMOpc;
1529   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1530   unsigned Opcode = 0;
1531   bool Match = false;
1532   if (LoadedVT == MVT::i32 && isPre &&
1533       SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1534     Opcode = ARM::LDR_PRE_IMM;
1535     Match = true;
1536   } else if (LoadedVT == MVT::i32 && !isPre &&
1537       SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1538     Opcode = ARM::LDR_POST_IMM;
1539     Match = true;
1540   } else if (LoadedVT == MVT::i32 &&
1541       SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1542     Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1543     Match = true;
1544 
1545   } else if (LoadedVT == MVT::i16 &&
1546              SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1547     Match = true;
1548     Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1549       ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1550       : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1551   } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1552     if (LD->getExtensionType() == ISD::SEXTLOAD) {
1553       if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1554         Match = true;
1555         Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1556       }
1557     } else {
1558       if (isPre &&
1559           SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1560         Match = true;
1561         Opcode = ARM::LDRB_PRE_IMM;
1562       } else if (!isPre &&
1563                   SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1564         Match = true;
1565         Opcode = ARM::LDRB_POST_IMM;
1566       } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1567         Match = true;
1568         Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1569       }
1570     }
1571   }
1572 
1573   if (Match) {
1574     if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1575       SDValue Chain = LD->getChain();
1576       SDValue Base = LD->getBasePtr();
1577       SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1578                        CurDAG->getRegister(0, MVT::i32), Chain };
1579       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1580                                            MVT::Other, Ops);
1581       transferMemOperands(N, New);
1582       ReplaceNode(N, New);
1583       return true;
1584     } else {
1585       SDValue Chain = LD->getChain();
1586       SDValue Base = LD->getBasePtr();
1587       SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1588                        CurDAG->getRegister(0, MVT::i32), Chain };
1589       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1590                                            MVT::Other, Ops);
1591       transferMemOperands(N, New);
1592       ReplaceNode(N, New);
1593       return true;
1594     }
1595   }
1596 
1597   return false;
1598 }
1599 
1600 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1601   LoadSDNode *LD = cast<LoadSDNode>(N);
1602   EVT LoadedVT = LD->getMemoryVT();
1603   ISD::MemIndexedMode AM = LD->getAddressingMode();
1604   if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1605       LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1606     return false;
1607 
1608   auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1609   if (!COffs || COffs->getZExtValue() != 4)
1610     return false;
1611 
1612   // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1613   // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1614   // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1615   // ISel.
1616   SDValue Chain = LD->getChain();
1617   SDValue Base = LD->getBasePtr();
1618   SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1619                    CurDAG->getRegister(0, MVT::i32), Chain };
1620   SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1621                                        MVT::i32, MVT::Other, Ops);
1622   transferMemOperands(N, New);
1623   ReplaceNode(N, New);
1624   return true;
1625 }
1626 
1627 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1628   LoadSDNode *LD = cast<LoadSDNode>(N);
1629   ISD::MemIndexedMode AM = LD->getAddressingMode();
1630   if (AM == ISD::UNINDEXED)
1631     return false;
1632 
1633   EVT LoadedVT = LD->getMemoryVT();
1634   bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1635   SDValue Offset;
1636   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1637   unsigned Opcode = 0;
1638   bool Match = false;
1639   if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1640     switch (LoadedVT.getSimpleVT().SimpleTy) {
1641     case MVT::i32:
1642       Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1643       break;
1644     case MVT::i16:
1645       if (isSExtLd)
1646         Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1647       else
1648         Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1649       break;
1650     case MVT::i8:
1651     case MVT::i1:
1652       if (isSExtLd)
1653         Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1654       else
1655         Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1656       break;
1657     default:
1658       return false;
1659     }
1660     Match = true;
1661   }
1662 
1663   if (Match) {
1664     SDValue Chain = LD->getChain();
1665     SDValue Base = LD->getBasePtr();
1666     SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1667                      CurDAG->getRegister(0, MVT::i32), Chain };
1668     SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1669                                          MVT::Other, Ops);
1670     transferMemOperands(N, New);
1671     ReplaceNode(N, New);
1672     return true;
1673   }
1674 
1675   return false;
1676 }
1677 
1678 bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
1679   EVT LoadedVT;
1680   unsigned Opcode = 0;
1681   bool isSExtLd, isPre;
1682   unsigned Align;
1683   ARMVCC::VPTCodes Pred;
1684   SDValue PredReg;
1685   SDValue Chain, Base, Offset;
1686 
1687   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1688     ISD::MemIndexedMode AM = LD->getAddressingMode();
1689     if (AM == ISD::UNINDEXED)
1690       return false;
1691     LoadedVT = LD->getMemoryVT();
1692     if (!LoadedVT.isVector())
1693       return false;
1694 
1695     Chain = LD->getChain();
1696     Base = LD->getBasePtr();
1697     Offset = LD->getOffset();
1698     Align = LD->getAlignment();
1699     isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1700     isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1701     Pred = ARMVCC::None;
1702     PredReg = CurDAG->getRegister(0, MVT::i32);
1703   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
1704     ISD::MemIndexedMode AM = LD->getAddressingMode();
1705     if (AM == ISD::UNINDEXED)
1706       return false;
1707     LoadedVT = LD->getMemoryVT();
1708     if (!LoadedVT.isVector())
1709       return false;
1710 
1711     Chain = LD->getChain();
1712     Base = LD->getBasePtr();
1713     Offset = LD->getOffset();
1714     Align = LD->getAlignment();
1715     isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1716     isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1717     Pred = ARMVCC::Then;
1718     PredReg = LD->getMask();
1719   } else
1720     llvm_unreachable("Expected a Load or a Masked Load!");
1721 
1722   // We allow LE non-masked loads to change the type (for example use a vldrb.8
1723   // as opposed to a vldrw.32). This can allow extra addressing modes or
1724   // alignments for what is otherwise an equivalent instruction.
1725   bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N);
1726 
1727   SDValue NewOffset;
1728   if (Align >= 2 && LoadedVT == MVT::v4i16 &&
1729       SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) {
1730     if (isSExtLd)
1731       Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
1732     else
1733       Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
1734   } else if (LoadedVT == MVT::v8i8 &&
1735              SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
1736     if (isSExtLd)
1737       Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
1738     else
1739       Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
1740   } else if (LoadedVT == MVT::v4i8 &&
1741              SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
1742     if (isSExtLd)
1743       Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
1744     else
1745       Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
1746   } else if (Align >= 4 &&
1747              (CanChangeType || LoadedVT == MVT::v4i32 ||
1748               LoadedVT == MVT::v4f32) &&
1749              SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2))
1750     Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
1751   else if (Align >= 2 &&
1752            (CanChangeType || LoadedVT == MVT::v8i16 ||
1753             LoadedVT == MVT::v8f16) &&
1754            SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1))
1755     Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
1756   else if ((CanChangeType || LoadedVT == MVT::v16i8) &&
1757            SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0))
1758     Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
1759   else
1760     return false;
1761 
1762   SDValue Ops[] = {Base, NewOffset,
1763                    CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32), PredReg,
1764                    Chain};
1765   SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), N->getValueType(0),
1766                                        MVT::i32, MVT::Other, Ops);
1767   transferMemOperands(N, New);
1768   ReplaceUses(SDValue(N, 0), SDValue(New, 1));
1769   ReplaceUses(SDValue(N, 1), SDValue(New, 0));
1770   ReplaceUses(SDValue(N, 2), SDValue(New, 2));
1771   CurDAG->RemoveDeadNode(N);
1772   return true;
1773 }
1774 
1775 /// Form a GPRPair pseudo register from a pair of GPR regs.
1776 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1777   SDLoc dl(V0.getNode());
1778   SDValue RegClass =
1779     CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1780   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1781   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1782   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1783   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1784 }
1785 
1786 /// Form a D register from a pair of S registers.
1787 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1788   SDLoc dl(V0.getNode());
1789   SDValue RegClass =
1790     CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1791   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1792   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1793   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1794   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1795 }
1796 
1797 /// Form a quad register from a pair of D registers.
1798 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1799   SDLoc dl(V0.getNode());
1800   SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1801                                                MVT::i32);
1802   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1803   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1804   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1805   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1806 }
1807 
1808 /// Form 4 consecutive D registers from a pair of Q registers.
1809 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1810   SDLoc dl(V0.getNode());
1811   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1812                                                MVT::i32);
1813   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1814   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1815   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1816   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1817 }
1818 
1819 /// Form 4 consecutive S registers.
1820 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1821                                    SDValue V2, SDValue V3) {
1822   SDLoc dl(V0.getNode());
1823   SDValue RegClass =
1824     CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1825   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1826   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1827   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1828   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1829   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1830                                     V2, SubReg2, V3, SubReg3 };
1831   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1832 }
1833 
1834 /// Form 4 consecutive D registers.
1835 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1836                                    SDValue V2, SDValue V3) {
1837   SDLoc dl(V0.getNode());
1838   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1839                                                MVT::i32);
1840   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1841   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1842   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1843   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1844   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1845                                     V2, SubReg2, V3, SubReg3 };
1846   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1847 }
1848 
1849 /// Form 4 consecutive Q registers.
1850 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1851                                    SDValue V2, SDValue V3) {
1852   SDLoc dl(V0.getNode());
1853   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1854                                                MVT::i32);
1855   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1856   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1857   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1858   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1859   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1860                                     V2, SubReg2, V3, SubReg3 };
1861   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1862 }
1863 
1864 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1865 /// of a NEON VLD or VST instruction.  The supported values depend on the
1866 /// number of registers being loaded.
1867 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1868                                        unsigned NumVecs, bool is64BitVector) {
1869   unsigned NumRegs = NumVecs;
1870   if (!is64BitVector && NumVecs < 3)
1871     NumRegs *= 2;
1872 
1873   unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1874   if (Alignment >= 32 && NumRegs == 4)
1875     Alignment = 32;
1876   else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1877     Alignment = 16;
1878   else if (Alignment >= 8)
1879     Alignment = 8;
1880   else
1881     Alignment = 0;
1882 
1883   return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1884 }
1885 
1886 static bool isVLDfixed(unsigned Opc)
1887 {
1888   switch (Opc) {
1889   default: return false;
1890   case ARM::VLD1d8wb_fixed : return true;
1891   case ARM::VLD1d16wb_fixed : return true;
1892   case ARM::VLD1d64Qwb_fixed : return true;
1893   case ARM::VLD1d32wb_fixed : return true;
1894   case ARM::VLD1d64wb_fixed : return true;
1895   case ARM::VLD1d64TPseudoWB_fixed : return true;
1896   case ARM::VLD1d64QPseudoWB_fixed : return true;
1897   case ARM::VLD1q8wb_fixed : return true;
1898   case ARM::VLD1q16wb_fixed : return true;
1899   case ARM::VLD1q32wb_fixed : return true;
1900   case ARM::VLD1q64wb_fixed : return true;
1901   case ARM::VLD1DUPd8wb_fixed : return true;
1902   case ARM::VLD1DUPd16wb_fixed : return true;
1903   case ARM::VLD1DUPd32wb_fixed : return true;
1904   case ARM::VLD1DUPq8wb_fixed : return true;
1905   case ARM::VLD1DUPq16wb_fixed : return true;
1906   case ARM::VLD1DUPq32wb_fixed : return true;
1907   case ARM::VLD2d8wb_fixed : return true;
1908   case ARM::VLD2d16wb_fixed : return true;
1909   case ARM::VLD2d32wb_fixed : return true;
1910   case ARM::VLD2q8PseudoWB_fixed : return true;
1911   case ARM::VLD2q16PseudoWB_fixed : return true;
1912   case ARM::VLD2q32PseudoWB_fixed : return true;
1913   case ARM::VLD2DUPd8wb_fixed : return true;
1914   case ARM::VLD2DUPd16wb_fixed : return true;
1915   case ARM::VLD2DUPd32wb_fixed : return true;
1916   }
1917 }
1918 
1919 static bool isVSTfixed(unsigned Opc)
1920 {
1921   switch (Opc) {
1922   default: return false;
1923   case ARM::VST1d8wb_fixed : return true;
1924   case ARM::VST1d16wb_fixed : return true;
1925   case ARM::VST1d32wb_fixed : return true;
1926   case ARM::VST1d64wb_fixed : return true;
1927   case ARM::VST1q8wb_fixed : return true;
1928   case ARM::VST1q16wb_fixed : return true;
1929   case ARM::VST1q32wb_fixed : return true;
1930   case ARM::VST1q64wb_fixed : return true;
1931   case ARM::VST1d64TPseudoWB_fixed : return true;
1932   case ARM::VST1d64QPseudoWB_fixed : return true;
1933   case ARM::VST2d8wb_fixed : return true;
1934   case ARM::VST2d16wb_fixed : return true;
1935   case ARM::VST2d32wb_fixed : return true;
1936   case ARM::VST2q8PseudoWB_fixed : return true;
1937   case ARM::VST2q16PseudoWB_fixed : return true;
1938   case ARM::VST2q32PseudoWB_fixed : return true;
1939   }
1940 }
1941 
1942 // Get the register stride update opcode of a VLD/VST instruction that
1943 // is otherwise equivalent to the given fixed stride updating instruction.
1944 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1945   assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1946     && "Incorrect fixed stride updating instruction.");
1947   switch (Opc) {
1948   default: break;
1949   case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1950   case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1951   case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1952   case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1953   case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1954   case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1955   case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1956   case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1957   case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1958   case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1959   case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1960   case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1961   case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
1962   case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
1963   case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
1964   case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
1965   case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
1966   case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
1967 
1968   case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1969   case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1970   case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1971   case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1972   case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1973   case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1974   case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1975   case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1976   case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1977   case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1978 
1979   case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1980   case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1981   case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1982   case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1983   case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1984   case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1985 
1986   case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1987   case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1988   case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1989   case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1990   case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1991   case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1992 
1993   case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1994   case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1995   case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1996   }
1997   return Opc; // If not one we handle, return it unchanged.
1998 }
1999 
2000 /// Returns true if the given increment is a Constant known to be equal to the
2001 /// access size performed by a NEON load/store. This means the "[rN]!" form can
2002 /// be used.
2003 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
2004   auto C = dyn_cast<ConstantSDNode>(Inc);
2005   return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
2006 }
2007 
2008 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
2009                                 const uint16_t *DOpcodes,
2010                                 const uint16_t *QOpcodes0,
2011                                 const uint16_t *QOpcodes1) {
2012   assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
2013   SDLoc dl(N);
2014 
2015   SDValue MemAddr, Align;
2016   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
2017                                    // nodes are not intrinsics.
2018   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2019   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2020     return;
2021 
2022   SDValue Chain = N->getOperand(0);
2023   EVT VT = N->getValueType(0);
2024   bool is64BitVector = VT.is64BitVector();
2025   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2026 
2027   unsigned OpcodeIndex;
2028   switch (VT.getSimpleVT().SimpleTy) {
2029   default: llvm_unreachable("unhandled vld type");
2030     // Double-register operations:
2031   case MVT::v8i8:  OpcodeIndex = 0; break;
2032   case MVT::v4f16:
2033   case MVT::v4i16: OpcodeIndex = 1; break;
2034   case MVT::v2f32:
2035   case MVT::v2i32: OpcodeIndex = 2; break;
2036   case MVT::v1i64: OpcodeIndex = 3; break;
2037     // Quad-register operations:
2038   case MVT::v16i8: OpcodeIndex = 0; break;
2039   case MVT::v8f16:
2040   case MVT::v8i16: OpcodeIndex = 1; break;
2041   case MVT::v4f32:
2042   case MVT::v4i32: OpcodeIndex = 2; break;
2043   case MVT::v2f64:
2044   case MVT::v2i64: OpcodeIndex = 3; break;
2045   }
2046 
2047   EVT ResTy;
2048   if (NumVecs == 1)
2049     ResTy = VT;
2050   else {
2051     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2052     if (!is64BitVector)
2053       ResTyElts *= 2;
2054     ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2055   }
2056   std::vector<EVT> ResTys;
2057   ResTys.push_back(ResTy);
2058   if (isUpdating)
2059     ResTys.push_back(MVT::i32);
2060   ResTys.push_back(MVT::Other);
2061 
2062   SDValue Pred = getAL(CurDAG, dl);
2063   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2064   SDNode *VLd;
2065   SmallVector<SDValue, 7> Ops;
2066 
2067   // Double registers and VLD1/VLD2 quad registers are directly supported.
2068   if (is64BitVector || NumVecs <= 2) {
2069     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2070                     QOpcodes0[OpcodeIndex]);
2071     Ops.push_back(MemAddr);
2072     Ops.push_back(Align);
2073     if (isUpdating) {
2074       SDValue Inc = N->getOperand(AddrOpIdx + 1);
2075       bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2076       if (!IsImmUpdate) {
2077         // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
2078         // check for the opcode rather than the number of vector elements.
2079         if (isVLDfixed(Opc))
2080           Opc = getVLDSTRegisterUpdateOpcode(Opc);
2081         Ops.push_back(Inc);
2082       // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
2083       // the operands if not such an opcode.
2084       } else if (!isVLDfixed(Opc))
2085         Ops.push_back(Reg0);
2086     }
2087     Ops.push_back(Pred);
2088     Ops.push_back(Reg0);
2089     Ops.push_back(Chain);
2090     VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2091 
2092   } else {
2093     // Otherwise, quad registers are loaded with two separate instructions,
2094     // where one loads the even registers and the other loads the odd registers.
2095     EVT AddrTy = MemAddr.getValueType();
2096 
2097     // Load the even subregs.  This is always an updating load, so that it
2098     // provides the address to the second load for the odd subregs.
2099     SDValue ImplDef =
2100       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2101     const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
2102     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2103                                           ResTy, AddrTy, MVT::Other, OpsA);
2104     Chain = SDValue(VLdA, 2);
2105 
2106     // Load the odd subregs.
2107     Ops.push_back(SDValue(VLdA, 1));
2108     Ops.push_back(Align);
2109     if (isUpdating) {
2110       SDValue Inc = N->getOperand(AddrOpIdx + 1);
2111       assert(isa<ConstantSDNode>(Inc.getNode()) &&
2112              "only constant post-increment update allowed for VLD3/4");
2113       (void)Inc;
2114       Ops.push_back(Reg0);
2115     }
2116     Ops.push_back(SDValue(VLdA, 0));
2117     Ops.push_back(Pred);
2118     Ops.push_back(Reg0);
2119     Ops.push_back(Chain);
2120     VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
2121   }
2122 
2123   // Transfer memoperands.
2124   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2125   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
2126 
2127   if (NumVecs == 1) {
2128     ReplaceNode(N, VLd);
2129     return;
2130   }
2131 
2132   // Extract out the subregisters.
2133   SDValue SuperReg = SDValue(VLd, 0);
2134   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2135                     ARM::qsub_3 == ARM::qsub_0 + 3,
2136                 "Unexpected subreg numbering");
2137   unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
2138   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2139     ReplaceUses(SDValue(N, Vec),
2140                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2141   ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
2142   if (isUpdating)
2143     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
2144   CurDAG->RemoveDeadNode(N);
2145 }
2146 
2147 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
2148                                 const uint16_t *DOpcodes,
2149                                 const uint16_t *QOpcodes0,
2150                                 const uint16_t *QOpcodes1) {
2151   assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
2152   SDLoc dl(N);
2153 
2154   SDValue MemAddr, Align;
2155   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
2156                                    // nodes are not intrinsics.
2157   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2158   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2159   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2160     return;
2161 
2162   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2163 
2164   SDValue Chain = N->getOperand(0);
2165   EVT VT = N->getOperand(Vec0Idx).getValueType();
2166   bool is64BitVector = VT.is64BitVector();
2167   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2168 
2169   unsigned OpcodeIndex;
2170   switch (VT.getSimpleVT().SimpleTy) {
2171   default: llvm_unreachable("unhandled vst type");
2172     // Double-register operations:
2173   case MVT::v8i8:  OpcodeIndex = 0; break;
2174   case MVT::v4f16:
2175   case MVT::v4i16: OpcodeIndex = 1; break;
2176   case MVT::v2f32:
2177   case MVT::v2i32: OpcodeIndex = 2; break;
2178   case MVT::v1i64: OpcodeIndex = 3; break;
2179     // Quad-register operations:
2180   case MVT::v16i8: OpcodeIndex = 0; break;
2181   case MVT::v8f16:
2182   case MVT::v8i16: OpcodeIndex = 1; break;
2183   case MVT::v4f32:
2184   case MVT::v4i32: OpcodeIndex = 2; break;
2185   case MVT::v2f64:
2186   case MVT::v2i64: OpcodeIndex = 3; break;
2187   }
2188 
2189   std::vector<EVT> ResTys;
2190   if (isUpdating)
2191     ResTys.push_back(MVT::i32);
2192   ResTys.push_back(MVT::Other);
2193 
2194   SDValue Pred = getAL(CurDAG, dl);
2195   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2196   SmallVector<SDValue, 7> Ops;
2197 
2198   // Double registers and VST1/VST2 quad registers are directly supported.
2199   if (is64BitVector || NumVecs <= 2) {
2200     SDValue SrcReg;
2201     if (NumVecs == 1) {
2202       SrcReg = N->getOperand(Vec0Idx);
2203     } else if (is64BitVector) {
2204       // Form a REG_SEQUENCE to force register allocation.
2205       SDValue V0 = N->getOperand(Vec0Idx + 0);
2206       SDValue V1 = N->getOperand(Vec0Idx + 1);
2207       if (NumVecs == 2)
2208         SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2209       else {
2210         SDValue V2 = N->getOperand(Vec0Idx + 2);
2211         // If it's a vst3, form a quad D-register and leave the last part as
2212         // an undef.
2213         SDValue V3 = (NumVecs == 3)
2214           ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2215           : N->getOperand(Vec0Idx + 3);
2216         SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2217       }
2218     } else {
2219       // Form a QQ register.
2220       SDValue Q0 = N->getOperand(Vec0Idx);
2221       SDValue Q1 = N->getOperand(Vec0Idx + 1);
2222       SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2223     }
2224 
2225     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2226                     QOpcodes0[OpcodeIndex]);
2227     Ops.push_back(MemAddr);
2228     Ops.push_back(Align);
2229     if (isUpdating) {
2230       SDValue Inc = N->getOperand(AddrOpIdx + 1);
2231       bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2232       if (!IsImmUpdate) {
2233         // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2234         // check for the opcode rather than the number of vector elements.
2235         if (isVSTfixed(Opc))
2236           Opc = getVLDSTRegisterUpdateOpcode(Opc);
2237         Ops.push_back(Inc);
2238       }
2239       // VST1/VST2 fixed increment does not need Reg0 so only include it in
2240       // the operands if not such an opcode.
2241       else if (!isVSTfixed(Opc))
2242         Ops.push_back(Reg0);
2243     }
2244     Ops.push_back(SrcReg);
2245     Ops.push_back(Pred);
2246     Ops.push_back(Reg0);
2247     Ops.push_back(Chain);
2248     SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2249 
2250     // Transfer memoperands.
2251     CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
2252 
2253     ReplaceNode(N, VSt);
2254     return;
2255   }
2256 
2257   // Otherwise, quad registers are stored with two separate instructions,
2258   // where one stores the even registers and the other stores the odd registers.
2259 
2260   // Form the QQQQ REG_SEQUENCE.
2261   SDValue V0 = N->getOperand(Vec0Idx + 0);
2262   SDValue V1 = N->getOperand(Vec0Idx + 1);
2263   SDValue V2 = N->getOperand(Vec0Idx + 2);
2264   SDValue V3 = (NumVecs == 3)
2265     ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2266     : N->getOperand(Vec0Idx + 3);
2267   SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2268 
2269   // Store the even D registers.  This is always an updating store, so that it
2270   // provides the address to the second store for the odd subregs.
2271   const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2272   SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2273                                         MemAddr.getValueType(),
2274                                         MVT::Other, OpsA);
2275   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
2276   Chain = SDValue(VStA, 1);
2277 
2278   // Store the odd D registers.
2279   Ops.push_back(SDValue(VStA, 0));
2280   Ops.push_back(Align);
2281   if (isUpdating) {
2282     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2283     assert(isa<ConstantSDNode>(Inc.getNode()) &&
2284            "only constant post-increment update allowed for VST3/4");
2285     (void)Inc;
2286     Ops.push_back(Reg0);
2287   }
2288   Ops.push_back(RegSeq);
2289   Ops.push_back(Pred);
2290   Ops.push_back(Reg0);
2291   Ops.push_back(Chain);
2292   SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2293                                         Ops);
2294   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
2295   ReplaceNode(N, VStB);
2296 }
2297 
2298 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2299                                       unsigned NumVecs,
2300                                       const uint16_t *DOpcodes,
2301                                       const uint16_t *QOpcodes) {
2302   assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2303   SDLoc dl(N);
2304 
2305   SDValue MemAddr, Align;
2306   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
2307                                    // nodes are not intrinsics.
2308   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2309   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2310   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2311     return;
2312 
2313   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2314 
2315   SDValue Chain = N->getOperand(0);
2316   unsigned Lane =
2317     cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2318   EVT VT = N->getOperand(Vec0Idx).getValueType();
2319   bool is64BitVector = VT.is64BitVector();
2320 
2321   unsigned Alignment = 0;
2322   if (NumVecs != 3) {
2323     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2324     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2325     if (Alignment > NumBytes)
2326       Alignment = NumBytes;
2327     if (Alignment < 8 && Alignment < NumBytes)
2328       Alignment = 0;
2329     // Alignment must be a power of two; make sure of that.
2330     Alignment = (Alignment & -Alignment);
2331     if (Alignment == 1)
2332       Alignment = 0;
2333   }
2334   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2335 
2336   unsigned OpcodeIndex;
2337   switch (VT.getSimpleVT().SimpleTy) {
2338   default: llvm_unreachable("unhandled vld/vst lane type");
2339     // Double-register operations:
2340   case MVT::v8i8:  OpcodeIndex = 0; break;
2341   case MVT::v4f16:
2342   case MVT::v4i16: OpcodeIndex = 1; break;
2343   case MVT::v2f32:
2344   case MVT::v2i32: OpcodeIndex = 2; break;
2345     // Quad-register operations:
2346   case MVT::v8f16:
2347   case MVT::v8i16: OpcodeIndex = 0; break;
2348   case MVT::v4f32:
2349   case MVT::v4i32: OpcodeIndex = 1; break;
2350   }
2351 
2352   std::vector<EVT> ResTys;
2353   if (IsLoad) {
2354     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2355     if (!is64BitVector)
2356       ResTyElts *= 2;
2357     ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2358                                       MVT::i64, ResTyElts));
2359   }
2360   if (isUpdating)
2361     ResTys.push_back(MVT::i32);
2362   ResTys.push_back(MVT::Other);
2363 
2364   SDValue Pred = getAL(CurDAG, dl);
2365   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2366 
2367   SmallVector<SDValue, 8> Ops;
2368   Ops.push_back(MemAddr);
2369   Ops.push_back(Align);
2370   if (isUpdating) {
2371     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2372     bool IsImmUpdate =
2373         isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2374     Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2375   }
2376 
2377   SDValue SuperReg;
2378   SDValue V0 = N->getOperand(Vec0Idx + 0);
2379   SDValue V1 = N->getOperand(Vec0Idx + 1);
2380   if (NumVecs == 2) {
2381     if (is64BitVector)
2382       SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2383     else
2384       SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2385   } else {
2386     SDValue V2 = N->getOperand(Vec0Idx + 2);
2387     SDValue V3 = (NumVecs == 3)
2388       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2389       : N->getOperand(Vec0Idx + 3);
2390     if (is64BitVector)
2391       SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2392     else
2393       SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2394   }
2395   Ops.push_back(SuperReg);
2396   Ops.push_back(getI32Imm(Lane, dl));
2397   Ops.push_back(Pred);
2398   Ops.push_back(Reg0);
2399   Ops.push_back(Chain);
2400 
2401   unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2402                                   QOpcodes[OpcodeIndex]);
2403   SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2404   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
2405   if (!IsLoad) {
2406     ReplaceNode(N, VLdLn);
2407     return;
2408   }
2409 
2410   // Extract the subregisters.
2411   SuperReg = SDValue(VLdLn, 0);
2412   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2413                     ARM::qsub_3 == ARM::qsub_0 + 3,
2414                 "Unexpected subreg numbering");
2415   unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2416   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2417     ReplaceUses(SDValue(N, Vec),
2418                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2419   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2420   if (isUpdating)
2421     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2422   CurDAG->RemoveDeadNode(N);
2423 }
2424 
2425 template <typename SDValueVector>
2426 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2427                                            SDValue PredicateMask) {
2428   Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2429   Ops.push_back(PredicateMask);
2430 }
2431 
2432 template <typename SDValueVector>
2433 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2434                                            SDValue PredicateMask,
2435                                            SDValue Inactive) {
2436   Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2437   Ops.push_back(PredicateMask);
2438   Ops.push_back(Inactive);
2439 }
2440 
2441 template <typename SDValueVector>
2442 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) {
2443   Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2444   Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2445 }
2446 
2447 template <typename SDValueVector>
2448 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2449                                                 EVT InactiveTy) {
2450   Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2451   Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2452   Ops.push_back(SDValue(
2453       CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0));
2454 }
2455 
2456 void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes,
2457                                    bool Predicated) {
2458   SDLoc Loc(N);
2459   SmallVector<SDValue, 8> Ops;
2460 
2461   uint16_t Opcode;
2462   switch (N->getValueType(1).getVectorElementType().getSizeInBits()) {
2463   case 32:
2464     Opcode = Opcodes[0];
2465     break;
2466   case 64:
2467     Opcode = Opcodes[1];
2468     break;
2469   default:
2470     llvm_unreachable("bad vector element size in SelectMVE_WB");
2471   }
2472 
2473   Ops.push_back(N->getOperand(2)); // vector of base addresses
2474 
2475   int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
2476   Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset
2477 
2478   if (Predicated)
2479     AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
2480   else
2481     AddEmptyMVEPredicateToOps(Ops, Loc);
2482 
2483   Ops.push_back(N->getOperand(0)); // chain
2484 
2485   CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
2486 }
2487 
2488 void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode,
2489                                           bool Immediate,
2490                                           bool HasSaturationOperand) {
2491   SDLoc Loc(N);
2492   SmallVector<SDValue, 8> Ops;
2493 
2494   // Two 32-bit halves of the value to be shifted
2495   Ops.push_back(N->getOperand(1));
2496   Ops.push_back(N->getOperand(2));
2497 
2498   // The shift count
2499   if (Immediate) {
2500     int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
2501     Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
2502   } else {
2503     Ops.push_back(N->getOperand(3));
2504   }
2505 
2506   // The immediate saturation operand, if any
2507   if (HasSaturationOperand) {
2508     int32_t SatOp = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue();
2509     int SatBit = (SatOp == 64 ? 0 : 1);
2510     Ops.push_back(getI32Imm(SatBit, Loc));
2511   }
2512 
2513   // MVE scalar shifts are IT-predicable, so include the standard
2514   // predicate arguments.
2515   Ops.push_back(getAL(CurDAG, Loc));
2516   Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2517 
2518   CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
2519 }
2520 
2521 void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
2522                                         uint16_t OpcodeWithNoCarry,
2523                                         bool Add, bool Predicated) {
2524   SDLoc Loc(N);
2525   SmallVector<SDValue, 8> Ops;
2526   uint16_t Opcode;
2527 
2528   unsigned FirstInputOp = Predicated ? 2 : 1;
2529 
2530   // Two input vectors and the input carry flag
2531   Ops.push_back(N->getOperand(FirstInputOp));
2532   Ops.push_back(N->getOperand(FirstInputOp + 1));
2533   SDValue CarryIn = N->getOperand(FirstInputOp + 2);
2534   ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn);
2535   uint32_t CarryMask = 1 << 29;
2536   uint32_t CarryExpected = Add ? 0 : CarryMask;
2537   if (CarryInConstant &&
2538       (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) {
2539     Opcode = OpcodeWithNoCarry;
2540   } else {
2541     Ops.push_back(CarryIn);
2542     Opcode = OpcodeWithCarry;
2543   }
2544 
2545   if (Predicated)
2546     AddMVEPredicateToOps(Ops, Loc,
2547                          N->getOperand(FirstInputOp + 3),  // predicate
2548                          N->getOperand(FirstInputOp - 1)); // inactive
2549   else
2550     AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
2551 
2552   CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
2553 }
2554 
2555 static bool SDValueToConstBool(SDValue SDVal) {
2556   assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant");
2557   ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal);
2558   uint64_t Value = SDValConstant->getZExtValue();
2559   assert((Value == 0 || Value == 1) && "expected value 0 or 1");
2560   return Value;
2561 }
2562 
2563 void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
2564                                             const uint16_t *OpcodesS,
2565                                             const uint16_t *OpcodesU,
2566                                             size_t Stride, size_t TySize) {
2567   assert(TySize < Stride && "Invalid TySize");
2568   bool IsUnsigned = SDValueToConstBool(N->getOperand(1));
2569   bool IsSub = SDValueToConstBool(N->getOperand(2));
2570   bool IsExchange = SDValueToConstBool(N->getOperand(3));
2571   if (IsUnsigned) {
2572     assert(!IsSub &&
2573            "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist");
2574     assert(!IsExchange &&
2575            "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist");
2576   }
2577 
2578   auto OpIsZero = [N](size_t OpNo) {
2579     if (ConstantSDNode *OpConst = dyn_cast<ConstantSDNode>(N->getOperand(OpNo)))
2580       if (OpConst->getZExtValue() == 0)
2581         return true;
2582     return false;
2583   };
2584 
2585   // If the input accumulator value is not zero, select an instruction with
2586   // accumulator, otherwise select an instruction without accumulator
2587   bool IsAccum = !(OpIsZero(4) && OpIsZero(5));
2588 
2589   const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS;
2590   if (IsSub)
2591     Opcodes += 4 * Stride;
2592   if (IsExchange)
2593     Opcodes += 2 * Stride;
2594   if (IsAccum)
2595     Opcodes += Stride;
2596   uint16_t Opcode = Opcodes[TySize];
2597 
2598   SDLoc Loc(N);
2599   SmallVector<SDValue, 8> Ops;
2600   // Push the accumulator operands, if they are used
2601   if (IsAccum) {
2602     Ops.push_back(N->getOperand(4));
2603     Ops.push_back(N->getOperand(5));
2604   }
2605   // Push the two vector operands
2606   Ops.push_back(N->getOperand(6));
2607   Ops.push_back(N->getOperand(7));
2608 
2609   if (Predicated)
2610     AddMVEPredicateToOps(Ops, Loc, N->getOperand(8));
2611   else
2612     AddEmptyMVEPredicateToOps(Ops, Loc);
2613 
2614   CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
2615 }
2616 
2617 void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated,
2618                                         const uint16_t *OpcodesS,
2619                                         const uint16_t *OpcodesU) {
2620   EVT VecTy = N->getOperand(6).getValueType();
2621   size_t SizeIndex;
2622   switch (VecTy.getVectorElementType().getSizeInBits()) {
2623   case 16:
2624     SizeIndex = 0;
2625     break;
2626   case 32:
2627     SizeIndex = 1;
2628     break;
2629   default:
2630     llvm_unreachable("bad vector element size");
2631   }
2632 
2633   SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex);
2634 }
2635 
2636 void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated,
2637                                           const uint16_t *OpcodesS,
2638                                           const uint16_t *OpcodesU) {
2639   assert(
2640       N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() ==
2641           32 &&
2642       "bad vector element size");
2643   SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0);
2644 }
2645 
2646 void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs,
2647                                     const uint16_t *const *Opcodes) {
2648   EVT VT = N->getValueType(0);
2649   SDLoc Loc(N);
2650 
2651   const uint16_t *OurOpcodes;
2652   switch (VT.getVectorElementType().getSizeInBits()) {
2653   case 8:
2654     OurOpcodes = Opcodes[0];
2655     break;
2656   case 16:
2657     OurOpcodes = Opcodes[1];
2658     break;
2659   case 32:
2660     OurOpcodes = Opcodes[2];
2661     break;
2662   default:
2663     llvm_unreachable("bad vector element size in SelectMVE_VLD");
2664   }
2665 
2666   EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2);
2667   EVT ResultTys[] = {DataTy, MVT::Other};
2668 
2669   auto Data = SDValue(
2670       CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0);
2671   SDValue Chain = N->getOperand(0);
2672   for (unsigned Stage = 0; Stage < NumVecs; ++Stage) {
2673     SDValue Ops[] = {Data, N->getOperand(2), Chain};
2674     auto LoadInst =
2675         CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops);
2676     Data = SDValue(LoadInst, 0);
2677     Chain = SDValue(LoadInst, 1);
2678   }
2679 
2680   for (unsigned i = 0; i < NumVecs; i++)
2681     ReplaceUses(SDValue(N, i),
2682                 CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT, Data));
2683   ReplaceUses(SDValue(N, NumVecs), Chain);
2684   CurDAG->RemoveDeadNode(N);
2685 }
2686 
2687 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2688                                    bool isUpdating, unsigned NumVecs,
2689                                    const uint16_t *DOpcodes,
2690                                    const uint16_t *QOpcodes0,
2691                                    const uint16_t *QOpcodes1) {
2692   assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2693   SDLoc dl(N);
2694 
2695   SDValue MemAddr, Align;
2696   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2697   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2698     return;
2699 
2700   SDValue Chain = N->getOperand(0);
2701   EVT VT = N->getValueType(0);
2702   bool is64BitVector = VT.is64BitVector();
2703 
2704   unsigned Alignment = 0;
2705   if (NumVecs != 3) {
2706     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2707     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2708     if (Alignment > NumBytes)
2709       Alignment = NumBytes;
2710     if (Alignment < 8 && Alignment < NumBytes)
2711       Alignment = 0;
2712     // Alignment must be a power of two; make sure of that.
2713     Alignment = (Alignment & -Alignment);
2714     if (Alignment == 1)
2715       Alignment = 0;
2716   }
2717   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2718 
2719   unsigned OpcodeIndex;
2720   switch (VT.getSimpleVT().SimpleTy) {
2721   default: llvm_unreachable("unhandled vld-dup type");
2722   case MVT::v8i8:
2723   case MVT::v16i8: OpcodeIndex = 0; break;
2724   case MVT::v4i16:
2725   case MVT::v8i16:
2726   case MVT::v4f16:
2727   case MVT::v8f16:
2728                   OpcodeIndex = 1; break;
2729   case MVT::v2f32:
2730   case MVT::v2i32:
2731   case MVT::v4f32:
2732   case MVT::v4i32: OpcodeIndex = 2; break;
2733   case MVT::v1f64:
2734   case MVT::v1i64: OpcodeIndex = 3; break;
2735   }
2736 
2737   unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2738   if (!is64BitVector)
2739     ResTyElts *= 2;
2740   EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2741 
2742   std::vector<EVT> ResTys;
2743   ResTys.push_back(ResTy);
2744   if (isUpdating)
2745     ResTys.push_back(MVT::i32);
2746   ResTys.push_back(MVT::Other);
2747 
2748   SDValue Pred = getAL(CurDAG, dl);
2749   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2750 
2751   SDNode *VLdDup;
2752   if (is64BitVector || NumVecs == 1) {
2753     SmallVector<SDValue, 6> Ops;
2754     Ops.push_back(MemAddr);
2755     Ops.push_back(Align);
2756     unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] :
2757                                    QOpcodes0[OpcodeIndex];
2758     if (isUpdating) {
2759       // fixed-stride update instructions don't have an explicit writeback
2760       // operand. It's implicit in the opcode itself.
2761       SDValue Inc = N->getOperand(2);
2762       bool IsImmUpdate =
2763           isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2764       if (NumVecs <= 2 && !IsImmUpdate)
2765         Opc = getVLDSTRegisterUpdateOpcode(Opc);
2766       if (!IsImmUpdate)
2767         Ops.push_back(Inc);
2768       // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2769       else if (NumVecs > 2)
2770         Ops.push_back(Reg0);
2771     }
2772     Ops.push_back(Pred);
2773     Ops.push_back(Reg0);
2774     Ops.push_back(Chain);
2775     VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2776   } else if (NumVecs == 2) {
2777     const SDValue OpsA[] = { MemAddr, Align, Pred, Reg0, Chain };
2778     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2779                                           dl, ResTys, OpsA);
2780 
2781     Chain = SDValue(VLdA, 1);
2782     const SDValue OpsB[] = { MemAddr, Align, Pred, Reg0, Chain };
2783     VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2784   } else {
2785     SDValue ImplDef =
2786       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2787     const SDValue OpsA[] = { MemAddr, Align, ImplDef, Pred, Reg0, Chain };
2788     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2789                                           dl, ResTys, OpsA);
2790 
2791     SDValue SuperReg = SDValue(VLdA, 0);
2792     Chain = SDValue(VLdA, 1);
2793     const SDValue OpsB[] = { MemAddr, Align, SuperReg, Pred, Reg0, Chain };
2794     VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2795   }
2796 
2797   // Transfer memoperands.
2798   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2799   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
2800 
2801   // Extract the subregisters.
2802   if (NumVecs == 1) {
2803     ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
2804   } else {
2805     SDValue SuperReg = SDValue(VLdDup, 0);
2806     static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
2807     unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2808     for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
2809       ReplaceUses(SDValue(N, Vec),
2810                   CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2811     }
2812   }
2813   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2814   if (isUpdating)
2815     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2816   CurDAG->RemoveDeadNode(N);
2817 }
2818 
2819 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
2820   if (!Subtarget->hasV6T2Ops())
2821     return false;
2822 
2823   unsigned Opc = isSigned
2824     ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2825     : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2826   SDLoc dl(N);
2827 
2828   // For unsigned extracts, check for a shift right and mask
2829   unsigned And_imm = 0;
2830   if (N->getOpcode() == ISD::AND) {
2831     if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2832 
2833       // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2834       if (And_imm & (And_imm + 1))
2835         return false;
2836 
2837       unsigned Srl_imm = 0;
2838       if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2839                                 Srl_imm)) {
2840         assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2841 
2842         // Mask off the unnecessary bits of the AND immediate; normally
2843         // DAGCombine will do this, but that might not happen if
2844         // targetShrinkDemandedConstant chooses a different immediate.
2845         And_imm &= -1U >> Srl_imm;
2846 
2847         // Note: The width operand is encoded as width-1.
2848         unsigned Width = countTrailingOnes(And_imm) - 1;
2849         unsigned LSB = Srl_imm;
2850 
2851         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2852 
2853         if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2854           // It's cheaper to use a right shift to extract the top bits.
2855           if (Subtarget->isThumb()) {
2856             Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2857             SDValue Ops[] = { N->getOperand(0).getOperand(0),
2858                               CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2859                               getAL(CurDAG, dl), Reg0, Reg0 };
2860             CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2861             return true;
2862           }
2863 
2864           // ARM models shift instructions as MOVsi with shifter operand.
2865           ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2866           SDValue ShOpc =
2867             CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2868                                       MVT::i32);
2869           SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2870                             getAL(CurDAG, dl), Reg0, Reg0 };
2871           CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2872           return true;
2873         }
2874 
2875         assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2876         SDValue Ops[] = { N->getOperand(0).getOperand(0),
2877                           CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2878                           CurDAG->getTargetConstant(Width, dl, MVT::i32),
2879                           getAL(CurDAG, dl), Reg0 };
2880         CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2881         return true;
2882       }
2883     }
2884     return false;
2885   }
2886 
2887   // Otherwise, we're looking for a shift of a shift
2888   unsigned Shl_imm = 0;
2889   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2890     assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2891     unsigned Srl_imm = 0;
2892     if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2893       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2894       // Note: The width operand is encoded as width-1.
2895       unsigned Width = 32 - Srl_imm - 1;
2896       int LSB = Srl_imm - Shl_imm;
2897       if (LSB < 0)
2898         return false;
2899       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2900       assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2901       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2902                         CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2903                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2904                         getAL(CurDAG, dl), Reg0 };
2905       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2906       return true;
2907     }
2908   }
2909 
2910   // Or we are looking for a shift of an and, with a mask operand
2911   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
2912       isShiftedMask_32(And_imm)) {
2913     unsigned Srl_imm = 0;
2914     unsigned LSB = countTrailingZeros(And_imm);
2915     // Shift must be the same as the ands lsb
2916     if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
2917       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2918       unsigned MSB = 31 - countLeadingZeros(And_imm);
2919       // Note: The width operand is encoded as width-1.
2920       unsigned Width = MSB - LSB;
2921       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2922       assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2923       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2924                         CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
2925                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2926                         getAL(CurDAG, dl), Reg0 };
2927       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2928       return true;
2929     }
2930   }
2931 
2932   if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2933     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2934     unsigned LSB = 0;
2935     if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2936         !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2937       return false;
2938 
2939     if (LSB + Width > 32)
2940       return false;
2941 
2942     SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2943     assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
2944     SDValue Ops[] = { N->getOperand(0).getOperand(0),
2945                       CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2946                       CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2947                       getAL(CurDAG, dl), Reg0 };
2948     CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2949     return true;
2950   }
2951 
2952   return false;
2953 }
2954 
2955 /// Target-specific DAG combining for ISD::XOR.
2956 /// Target-independent combining lowers SELECT_CC nodes of the form
2957 /// select_cc setg[ge] X,  0,  X, -X
2958 /// select_cc setgt    X, -1,  X, -X
2959 /// select_cc setl[te] X,  0, -X,  X
2960 /// select_cc setlt    X,  1, -X,  X
2961 /// which represent Integer ABS into:
2962 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2963 /// ARM instruction selection detects the latter and matches it to
2964 /// ARM::ABS or ARM::t2ABS machine node.
2965 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
2966   SDValue XORSrc0 = N->getOperand(0);
2967   SDValue XORSrc1 = N->getOperand(1);
2968   EVT VT = N->getValueType(0);
2969 
2970   if (Subtarget->isThumb1Only())
2971     return false;
2972 
2973   if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2974     return false;
2975 
2976   SDValue ADDSrc0 = XORSrc0.getOperand(0);
2977   SDValue ADDSrc1 = XORSrc0.getOperand(1);
2978   SDValue SRASrc0 = XORSrc1.getOperand(0);
2979   SDValue SRASrc1 = XORSrc1.getOperand(1);
2980   ConstantSDNode *SRAConstant =  dyn_cast<ConstantSDNode>(SRASrc1);
2981   EVT XType = SRASrc0.getValueType();
2982   unsigned Size = XType.getSizeInBits() - 1;
2983 
2984   if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2985       XType.isInteger() && SRAConstant != nullptr &&
2986       Size == SRAConstant->getZExtValue()) {
2987     unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2988     CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2989     return true;
2990   }
2991 
2992   return false;
2993 }
2994 
2995 /// We've got special pseudo-instructions for these
2996 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2997   unsigned Opcode;
2998   EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2999   if (MemTy == MVT::i8)
3000     Opcode = ARM::CMP_SWAP_8;
3001   else if (MemTy == MVT::i16)
3002     Opcode = ARM::CMP_SWAP_16;
3003   else if (MemTy == MVT::i32)
3004     Opcode = ARM::CMP_SWAP_32;
3005   else
3006     llvm_unreachable("Unknown AtomicCmpSwap type");
3007 
3008   SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
3009                    N->getOperand(0)};
3010   SDNode *CmpSwap = CurDAG->getMachineNode(
3011       Opcode, SDLoc(N),
3012       CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
3013 
3014   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
3015   CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
3016 
3017   ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
3018   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
3019   CurDAG->RemoveDeadNode(N);
3020 }
3021 
3022 static Optional<std::pair<unsigned, unsigned>>
3023 getContiguousRangeOfSetBits(const APInt &A) {
3024   unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
3025   unsigned LastOne = A.countTrailingZeros();
3026   if (A.countPopulation() != (FirstOne - LastOne + 1))
3027     return Optional<std::pair<unsigned,unsigned>>();
3028   return std::make_pair(FirstOne, LastOne);
3029 }
3030 
3031 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
3032   assert(N->getOpcode() == ARMISD::CMPZ);
3033   SwitchEQNEToPLMI = false;
3034 
3035   if (!Subtarget->isThumb())
3036     // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
3037     // LSR don't exist as standalone instructions - they need the barrel shifter.
3038     return;
3039 
3040   // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
3041   SDValue And = N->getOperand(0);
3042   if (!And->hasOneUse())
3043     return;
3044 
3045   SDValue Zero = N->getOperand(1);
3046   if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() ||
3047       And->getOpcode() != ISD::AND)
3048     return;
3049   SDValue X = And.getOperand(0);
3050   auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
3051 
3052   if (!C)
3053     return;
3054   auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
3055   if (!Range)
3056     return;
3057 
3058   // There are several ways to lower this:
3059   SDNode *NewN;
3060   SDLoc dl(N);
3061 
3062   auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
3063     if (Subtarget->isThumb2()) {
3064       Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
3065       SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3066                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3067                         CurDAG->getRegister(0, MVT::i32) };
3068       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3069     } else {
3070       SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
3071                        CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3072                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3073       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3074     }
3075   };
3076 
3077   if (Range->second == 0) {
3078     //  1. Mask includes the LSB -> Simply shift the top N bits off
3079     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3080     ReplaceNode(And.getNode(), NewN);
3081   } else if (Range->first == 31) {
3082     //  2. Mask includes the MSB -> Simply shift the bottom N bits off
3083     NewN = EmitShift(ARM::tLSRri, X, Range->second);
3084     ReplaceNode(And.getNode(), NewN);
3085   } else if (Range->first == Range->second) {
3086     //  3. Only one bit is set. We can shift this into the sign bit and use a
3087     //     PL/MI comparison.
3088     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3089     ReplaceNode(And.getNode(), NewN);
3090 
3091     SwitchEQNEToPLMI = true;
3092   } else if (!Subtarget->hasV6T2Ops()) {
3093     //  4. Do a double shift to clear bottom and top bits, but only in
3094     //     thumb-1 mode as in thumb-2 we can use UBFX.
3095     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3096     NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
3097                      Range->second + (31 - Range->first));
3098     ReplaceNode(And.getNode(), NewN);
3099   }
3100 
3101 }
3102 
3103 void ARMDAGToDAGISel::Select(SDNode *N) {
3104   SDLoc dl(N);
3105 
3106   if (N->isMachineOpcode()) {
3107     N->setNodeId(-1);
3108     return;   // Already selected.
3109   }
3110 
3111   switch (N->getOpcode()) {
3112   default: break;
3113   case ISD::STORE: {
3114     // For Thumb1, match an sp-relative store in C++. This is a little
3115     // unfortunate, but I don't think I can make the chain check work
3116     // otherwise.  (The chain of the store has to be the same as the chain
3117     // of the CopyFromReg, or else we can't replace the CopyFromReg with
3118     // a direct reference to "SP".)
3119     //
3120     // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
3121     // a different addressing mode from other four-byte stores.
3122     //
3123     // This pattern usually comes up with call arguments.
3124     StoreSDNode *ST = cast<StoreSDNode>(N);
3125     SDValue Ptr = ST->getBasePtr();
3126     if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
3127       int RHSC = 0;
3128       if (Ptr.getOpcode() == ISD::ADD &&
3129           isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
3130         Ptr = Ptr.getOperand(0);
3131 
3132       if (Ptr.getOpcode() == ISD::CopyFromReg &&
3133           cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
3134           Ptr.getOperand(0) == ST->getChain()) {
3135         SDValue Ops[] = {ST->getValue(),
3136                          CurDAG->getRegister(ARM::SP, MVT::i32),
3137                          CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
3138                          getAL(CurDAG, dl),
3139                          CurDAG->getRegister(0, MVT::i32),
3140                          ST->getChain()};
3141         MachineSDNode *ResNode =
3142             CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
3143         MachineMemOperand *MemOp = ST->getMemOperand();
3144         CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
3145         ReplaceNode(N, ResNode);
3146         return;
3147       }
3148     }
3149     break;
3150   }
3151   case ISD::WRITE_REGISTER:
3152     if (tryWriteRegister(N))
3153       return;
3154     break;
3155   case ISD::READ_REGISTER:
3156     if (tryReadRegister(N))
3157       return;
3158     break;
3159   case ISD::INLINEASM:
3160   case ISD::INLINEASM_BR:
3161     if (tryInlineAsm(N))
3162       return;
3163     break;
3164   case ISD::XOR:
3165     // Select special operations if XOR node forms integer ABS pattern
3166     if (tryABSOp(N))
3167       return;
3168     // Other cases are autogenerated.
3169     break;
3170   case ISD::Constant: {
3171     unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
3172     // If we can't materialize the constant we need to use a literal pool
3173     if (ConstantMaterializationCost(Val, Subtarget) > 2) {
3174       SDValue CPIdx = CurDAG->getTargetConstantPool(
3175           ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
3176           TLI->getPointerTy(CurDAG->getDataLayout()));
3177 
3178       SDNode *ResNode;
3179       if (Subtarget->isThumb()) {
3180         SDValue Ops[] = {
3181           CPIdx,
3182           getAL(CurDAG, dl),
3183           CurDAG->getRegister(0, MVT::i32),
3184           CurDAG->getEntryNode()
3185         };
3186         ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
3187                                          Ops);
3188       } else {
3189         SDValue Ops[] = {
3190           CPIdx,
3191           CurDAG->getTargetConstant(0, dl, MVT::i32),
3192           getAL(CurDAG, dl),
3193           CurDAG->getRegister(0, MVT::i32),
3194           CurDAG->getEntryNode()
3195         };
3196         ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
3197                                          Ops);
3198       }
3199       // Annotate the Node with memory operand information so that MachineInstr
3200       // queries work properly. This e.g. gives the register allocation the
3201       // required information for rematerialization.
3202       MachineFunction& MF = CurDAG->getMachineFunction();
3203       MachineMemOperand *MemOp =
3204           MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
3205                                   MachineMemOperand::MOLoad, 4, 4);
3206 
3207       CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
3208 
3209       ReplaceNode(N, ResNode);
3210       return;
3211     }
3212 
3213     // Other cases are autogenerated.
3214     break;
3215   }
3216   case ISD::FrameIndex: {
3217     // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
3218     int FI = cast<FrameIndexSDNode>(N)->getIndex();
3219     SDValue TFI = CurDAG->getTargetFrameIndex(
3220         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
3221     if (Subtarget->isThumb1Only()) {
3222       // Set the alignment of the frame object to 4, to avoid having to generate
3223       // more than one ADD
3224       MachineFrameInfo &MFI = MF->getFrameInfo();
3225       if (MFI.getObjectAlignment(FI) < 4)
3226         MFI.setObjectAlignment(FI, 4);
3227       CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
3228                            CurDAG->getTargetConstant(0, dl, MVT::i32));
3229       return;
3230     } else {
3231       unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
3232                       ARM::t2ADDri : ARM::ADDri);
3233       SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
3234                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3235                         CurDAG->getRegister(0, MVT::i32) };
3236       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3237       return;
3238     }
3239   }
3240   case ISD::SRL:
3241     if (tryV6T2BitfieldExtractOp(N, false))
3242       return;
3243     break;
3244   case ISD::SIGN_EXTEND_INREG:
3245   case ISD::SRA:
3246     if (tryV6T2BitfieldExtractOp(N, true))
3247       return;
3248     break;
3249   case ISD::MUL:
3250     if (Subtarget->isThumb1Only())
3251       break;
3252     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
3253       unsigned RHSV = C->getZExtValue();
3254       if (!RHSV) break;
3255       if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
3256         unsigned ShImm = Log2_32(RHSV-1);
3257         if (ShImm >= 32)
3258           break;
3259         SDValue V = N->getOperand(0);
3260         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
3261         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3262         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3263         if (Subtarget->isThumb()) {
3264           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3265           CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
3266           return;
3267         } else {
3268           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3269                             Reg0 };
3270           CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
3271           return;
3272         }
3273       }
3274       if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
3275         unsigned ShImm = Log2_32(RHSV+1);
3276         if (ShImm >= 32)
3277           break;
3278         SDValue V = N->getOperand(0);
3279         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
3280         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3281         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3282         if (Subtarget->isThumb()) {
3283           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3284           CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
3285           return;
3286         } else {
3287           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3288                             Reg0 };
3289           CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
3290           return;
3291         }
3292       }
3293     }
3294     break;
3295   case ISD::AND: {
3296     // Check for unsigned bitfield extract
3297     if (tryV6T2BitfieldExtractOp(N, false))
3298       return;
3299 
3300     // If an immediate is used in an AND node, it is possible that the immediate
3301     // can be more optimally materialized when negated. If this is the case we
3302     // can negate the immediate and use a BIC instead.
3303     auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
3304     if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
3305       uint32_t Imm = (uint32_t) N1C->getZExtValue();
3306 
3307       // In Thumb2 mode, an AND can take a 12-bit immediate. If this
3308       // immediate can be negated and fit in the immediate operand of
3309       // a t2BIC, don't do any manual transform here as this can be
3310       // handled by the generic ISel machinery.
3311       bool PreferImmediateEncoding =
3312         Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
3313       if (!PreferImmediateEncoding &&
3314           ConstantMaterializationCost(Imm, Subtarget) >
3315               ConstantMaterializationCost(~Imm, Subtarget)) {
3316         // The current immediate costs more to materialize than a negated
3317         // immediate, so negate the immediate and use a BIC.
3318         SDValue NewImm =
3319           CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
3320         // If the new constant didn't exist before, reposition it in the topological
3321         // ordering so it is just before N. Otherwise, don't touch its location.
3322         if (NewImm->getNodeId() == -1)
3323           CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
3324 
3325         if (!Subtarget->hasThumb2()) {
3326           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
3327                            N->getOperand(0), NewImm, getAL(CurDAG, dl),
3328                            CurDAG->getRegister(0, MVT::i32)};
3329           ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
3330           return;
3331         } else {
3332           SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
3333                            CurDAG->getRegister(0, MVT::i32),
3334                            CurDAG->getRegister(0, MVT::i32)};
3335           ReplaceNode(N,
3336                       CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
3337           return;
3338         }
3339       }
3340     }
3341 
3342     // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
3343     // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
3344     // are entirely contributed by c2 and lower 16-bits are entirely contributed
3345     // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
3346     // Select it to: "movt x, ((c1 & 0xffff) >> 16)
3347     EVT VT = N->getValueType(0);
3348     if (VT != MVT::i32)
3349       break;
3350     unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
3351       ? ARM::t2MOVTi16
3352       : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
3353     if (!Opc)
3354       break;
3355     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3356     N1C = dyn_cast<ConstantSDNode>(N1);
3357     if (!N1C)
3358       break;
3359     if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
3360       SDValue N2 = N0.getOperand(1);
3361       ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
3362       if (!N2C)
3363         break;
3364       unsigned N1CVal = N1C->getZExtValue();
3365       unsigned N2CVal = N2C->getZExtValue();
3366       if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
3367           (N1CVal & 0xffffU) == 0xffffU &&
3368           (N2CVal & 0xffffU) == 0x0U) {
3369         SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
3370                                                   dl, MVT::i32);
3371         SDValue Ops[] = { N0.getOperand(0), Imm16,
3372                           getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
3373         ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
3374         return;
3375       }
3376     }
3377 
3378     break;
3379   }
3380   case ARMISD::UMAAL: {
3381     unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
3382     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3383                       N->getOperand(2), N->getOperand(3),
3384                       getAL(CurDAG, dl),
3385                       CurDAG->getRegister(0, MVT::i32) };
3386     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
3387     return;
3388   }
3389   case ARMISD::UMLAL:{
3390     if (Subtarget->isThumb()) {
3391       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3392                         N->getOperand(3), getAL(CurDAG, dl),
3393                         CurDAG->getRegister(0, MVT::i32)};
3394       ReplaceNode(
3395           N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
3396       return;
3397     }else{
3398       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3399                         N->getOperand(3), getAL(CurDAG, dl),
3400                         CurDAG->getRegister(0, MVT::i32),
3401                         CurDAG->getRegister(0, MVT::i32) };
3402       ReplaceNode(N, CurDAG->getMachineNode(
3403                          Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
3404                          MVT::i32, MVT::i32, Ops));
3405       return;
3406     }
3407   }
3408   case ARMISD::SMLAL:{
3409     if (Subtarget->isThumb()) {
3410       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3411                         N->getOperand(3), getAL(CurDAG, dl),
3412                         CurDAG->getRegister(0, MVT::i32)};
3413       ReplaceNode(
3414           N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
3415       return;
3416     }else{
3417       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3418                         N->getOperand(3), getAL(CurDAG, dl),
3419                         CurDAG->getRegister(0, MVT::i32),
3420                         CurDAG->getRegister(0, MVT::i32) };
3421       ReplaceNode(N, CurDAG->getMachineNode(
3422                          Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3423                          MVT::i32, MVT::i32, Ops));
3424       return;
3425     }
3426   }
3427   case ARMISD::SUBE: {
3428     if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
3429       break;
3430     // Look for a pattern to match SMMLS
3431     // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
3432     if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
3433         N->getOperand(2).getOpcode() != ARMISD::SUBC ||
3434         !SDValue(N, 1).use_empty())
3435       break;
3436 
3437     if (Subtarget->isThumb())
3438       assert(Subtarget->hasThumb2() &&
3439              "This pattern should not be generated for Thumb");
3440 
3441     SDValue SmulLoHi = N->getOperand(1);
3442     SDValue Subc = N->getOperand(2);
3443     auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
3444 
3445     if (!Zero || Zero->getZExtValue() != 0 ||
3446         Subc.getOperand(1) != SmulLoHi.getValue(0) ||
3447         N->getOperand(1) != SmulLoHi.getValue(1) ||
3448         N->getOperand(2) != Subc.getValue(1))
3449       break;
3450 
3451     unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
3452     SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
3453                       N->getOperand(0), getAL(CurDAG, dl),
3454                       CurDAG->getRegister(0, MVT::i32) };
3455     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
3456     return;
3457   }
3458   case ISD::LOAD: {
3459     if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
3460       return;
3461     if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
3462       if (tryT2IndexedLoad(N))
3463         return;
3464     } else if (Subtarget->isThumb()) {
3465       if (tryT1IndexedLoad(N))
3466         return;
3467     } else if (tryARMIndexedLoad(N))
3468       return;
3469     // Other cases are autogenerated.
3470     break;
3471   }
3472   case ISD::MLOAD:
3473     if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
3474       return;
3475     // Other cases are autogenerated.
3476     break;
3477   case ARMISD::WLS:
3478   case ARMISD::LE: {
3479     SDValue Ops[] = { N->getOperand(1),
3480                       N->getOperand(2),
3481                       N->getOperand(0) };
3482     unsigned Opc = N->getOpcode() == ARMISD::WLS ?
3483       ARM::t2WhileLoopStart : ARM::t2LoopEnd;
3484     SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
3485     ReplaceUses(N, New);
3486     CurDAG->RemoveDeadNode(N);
3487     return;
3488   }
3489   case ARMISD::LOOP_DEC: {
3490     SDValue Ops[] = { N->getOperand(1),
3491                       N->getOperand(2),
3492                       N->getOperand(0) };
3493     SDNode *Dec =
3494       CurDAG->getMachineNode(ARM::t2LoopDec, dl,
3495                              CurDAG->getVTList(MVT::i32, MVT::Other), Ops);
3496     ReplaceUses(N, Dec);
3497     CurDAG->RemoveDeadNode(N);
3498     return;
3499   }
3500   case ARMISD::BRCOND: {
3501     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3502     // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3503     // Pattern complexity = 6  cost = 1  size = 0
3504 
3505     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3506     // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
3507     // Pattern complexity = 6  cost = 1  size = 0
3508 
3509     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3510     // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3511     // Pattern complexity = 6  cost = 1  size = 0
3512 
3513     unsigned Opc = Subtarget->isThumb() ?
3514       ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
3515     SDValue Chain = N->getOperand(0);
3516     SDValue N1 = N->getOperand(1);
3517     SDValue N2 = N->getOperand(2);
3518     SDValue N3 = N->getOperand(3);
3519     SDValue InFlag = N->getOperand(4);
3520     assert(N1.getOpcode() == ISD::BasicBlock);
3521     assert(N2.getOpcode() == ISD::Constant);
3522     assert(N3.getOpcode() == ISD::Register);
3523 
3524     unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
3525 
3526     if (InFlag.getOpcode() == ARMISD::CMPZ) {
3527       if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
3528         SDValue Int = InFlag.getOperand(0);
3529         uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
3530 
3531         // Handle low-overhead loops.
3532         if (ID == Intrinsic::loop_decrement_reg) {
3533           SDValue Elements = Int.getOperand(2);
3534           SDValue Size = CurDAG->getTargetConstant(
3535             cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl,
3536                                  MVT::i32);
3537 
3538           SDValue Args[] = { Elements, Size, Int.getOperand(0) };
3539           SDNode *LoopDec =
3540             CurDAG->getMachineNode(ARM::t2LoopDec, dl,
3541                                    CurDAG->getVTList(MVT::i32, MVT::Other),
3542                                    Args);
3543           ReplaceUses(Int.getNode(), LoopDec);
3544 
3545           SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
3546           SDNode *LoopEnd =
3547             CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
3548 
3549           ReplaceUses(N, LoopEnd);
3550           CurDAG->RemoveDeadNode(N);
3551           CurDAG->RemoveDeadNode(InFlag.getNode());
3552           CurDAG->RemoveDeadNode(Int.getNode());
3553           return;
3554         }
3555       }
3556 
3557       bool SwitchEQNEToPLMI;
3558       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3559       InFlag = N->getOperand(4);
3560 
3561       if (SwitchEQNEToPLMI) {
3562         switch ((ARMCC::CondCodes)CC) {
3563         default: llvm_unreachable("CMPZ must be either NE or EQ!");
3564         case ARMCC::NE:
3565           CC = (unsigned)ARMCC::MI;
3566           break;
3567         case ARMCC::EQ:
3568           CC = (unsigned)ARMCC::PL;
3569           break;
3570         }
3571       }
3572     }
3573 
3574     SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
3575     SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
3576     SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
3577                                              MVT::Glue, Ops);
3578     Chain = SDValue(ResNode, 0);
3579     if (N->getNumValues() == 2) {
3580       InFlag = SDValue(ResNode, 1);
3581       ReplaceUses(SDValue(N, 1), InFlag);
3582     }
3583     ReplaceUses(SDValue(N, 0),
3584                 SDValue(Chain.getNode(), Chain.getResNo()));
3585     CurDAG->RemoveDeadNode(N);
3586     return;
3587   }
3588 
3589   case ARMISD::CMPZ: {
3590     // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
3591     //   This allows us to avoid materializing the expensive negative constant.
3592     //   The CMPZ #0 is useless and will be peepholed away but we need to keep it
3593     //   for its glue output.
3594     SDValue X = N->getOperand(0);
3595     auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
3596     if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
3597       int64_t Addend = -C->getSExtValue();
3598 
3599       SDNode *Add = nullptr;
3600       // ADDS can be better than CMN if the immediate fits in a
3601       // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
3602       // Outside that range we can just use a CMN which is 32-bit but has a
3603       // 12-bit immediate range.
3604       if (Addend < 1<<8) {
3605         if (Subtarget->isThumb2()) {
3606           SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3607                             getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3608                             CurDAG->getRegister(0, MVT::i32) };
3609           Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
3610         } else {
3611           unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
3612           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
3613                            CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3614                            getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3615           Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3616         }
3617       }
3618       if (Add) {
3619         SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
3620         CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
3621       }
3622     }
3623     // Other cases are autogenerated.
3624     break;
3625   }
3626 
3627   case ARMISD::CMOV: {
3628     SDValue InFlag = N->getOperand(4);
3629 
3630     if (InFlag.getOpcode() == ARMISD::CMPZ) {
3631       bool SwitchEQNEToPLMI;
3632       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3633 
3634       if (SwitchEQNEToPLMI) {
3635         SDValue ARMcc = N->getOperand(2);
3636         ARMCC::CondCodes CC =
3637           (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
3638 
3639         switch (CC) {
3640         default: llvm_unreachable("CMPZ must be either NE or EQ!");
3641         case ARMCC::NE:
3642           CC = ARMCC::MI;
3643           break;
3644         case ARMCC::EQ:
3645           CC = ARMCC::PL;
3646           break;
3647         }
3648         SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
3649         SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
3650                          N->getOperand(3), N->getOperand(4)};
3651         CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
3652       }
3653 
3654     }
3655     // Other cases are autogenerated.
3656     break;
3657   }
3658 
3659   case ARMISD::VZIP: {
3660     unsigned Opc = 0;
3661     EVT VT = N->getValueType(0);
3662     switch (VT.getSimpleVT().SimpleTy) {
3663     default: return;
3664     case MVT::v8i8:  Opc = ARM::VZIPd8; break;
3665     case MVT::v4f16:
3666     case MVT::v4i16: Opc = ARM::VZIPd16; break;
3667     case MVT::v2f32:
3668     // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3669     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3670     case MVT::v16i8: Opc = ARM::VZIPq8; break;
3671     case MVT::v8f16:
3672     case MVT::v8i16: Opc = ARM::VZIPq16; break;
3673     case MVT::v4f32:
3674     case MVT::v4i32: Opc = ARM::VZIPq32; break;
3675     }
3676     SDValue Pred = getAL(CurDAG, dl);
3677     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3678     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3679     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3680     return;
3681   }
3682   case ARMISD::VUZP: {
3683     unsigned Opc = 0;
3684     EVT VT = N->getValueType(0);
3685     switch (VT.getSimpleVT().SimpleTy) {
3686     default: return;
3687     case MVT::v8i8:  Opc = ARM::VUZPd8; break;
3688     case MVT::v4f16:
3689     case MVT::v4i16: Opc = ARM::VUZPd16; break;
3690     case MVT::v2f32:
3691     // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3692     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3693     case MVT::v16i8: Opc = ARM::VUZPq8; break;
3694     case MVT::v8f16:
3695     case MVT::v8i16: Opc = ARM::VUZPq16; break;
3696     case MVT::v4f32:
3697     case MVT::v4i32: Opc = ARM::VUZPq32; break;
3698     }
3699     SDValue Pred = getAL(CurDAG, dl);
3700     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3701     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3702     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3703     return;
3704   }
3705   case ARMISD::VTRN: {
3706     unsigned Opc = 0;
3707     EVT VT = N->getValueType(0);
3708     switch (VT.getSimpleVT().SimpleTy) {
3709     default: return;
3710     case MVT::v8i8:  Opc = ARM::VTRNd8; break;
3711     case MVT::v4f16:
3712     case MVT::v4i16: Opc = ARM::VTRNd16; break;
3713     case MVT::v2f32:
3714     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3715     case MVT::v16i8: Opc = ARM::VTRNq8; break;
3716     case MVT::v8f16:
3717     case MVT::v8i16: Opc = ARM::VTRNq16; break;
3718     case MVT::v4f32:
3719     case MVT::v4i32: Opc = ARM::VTRNq32; break;
3720     }
3721     SDValue Pred = getAL(CurDAG, dl);
3722     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3723     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3724     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3725     return;
3726   }
3727   case ARMISD::BUILD_VECTOR: {
3728     EVT VecVT = N->getValueType(0);
3729     EVT EltVT = VecVT.getVectorElementType();
3730     unsigned NumElts = VecVT.getVectorNumElements();
3731     if (EltVT == MVT::f64) {
3732       assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
3733       ReplaceNode(
3734           N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3735       return;
3736     }
3737     assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
3738     if (NumElts == 2) {
3739       ReplaceNode(
3740           N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3741       return;
3742     }
3743     assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
3744     ReplaceNode(N,
3745                 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
3746                                     N->getOperand(2), N->getOperand(3)));
3747     return;
3748   }
3749 
3750   case ARMISD::VLD1DUP: {
3751     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
3752                                          ARM::VLD1DUPd32 };
3753     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
3754                                          ARM::VLD1DUPq32 };
3755     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
3756     return;
3757   }
3758 
3759   case ARMISD::VLD2DUP: {
3760     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3761                                         ARM::VLD2DUPd32 };
3762     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
3763     return;
3764   }
3765 
3766   case ARMISD::VLD3DUP: {
3767     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
3768                                         ARM::VLD3DUPd16Pseudo,
3769                                         ARM::VLD3DUPd32Pseudo };
3770     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
3771     return;
3772   }
3773 
3774   case ARMISD::VLD4DUP: {
3775     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
3776                                         ARM::VLD4DUPd16Pseudo,
3777                                         ARM::VLD4DUPd32Pseudo };
3778     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
3779     return;
3780   }
3781 
3782   case ARMISD::VLD1DUP_UPD: {
3783     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
3784                                          ARM::VLD1DUPd16wb_fixed,
3785                                          ARM::VLD1DUPd32wb_fixed };
3786     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
3787                                          ARM::VLD1DUPq16wb_fixed,
3788                                          ARM::VLD1DUPq32wb_fixed };
3789     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
3790     return;
3791   }
3792 
3793   case ARMISD::VLD2DUP_UPD: {
3794     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
3795                                         ARM::VLD2DUPd16wb_fixed,
3796                                         ARM::VLD2DUPd32wb_fixed };
3797     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, Opcodes);
3798     return;
3799   }
3800 
3801   case ARMISD::VLD3DUP_UPD: {
3802     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
3803                                         ARM::VLD3DUPd16Pseudo_UPD,
3804                                         ARM::VLD3DUPd32Pseudo_UPD };
3805     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, Opcodes);
3806     return;
3807   }
3808 
3809   case ARMISD::VLD4DUP_UPD: {
3810     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
3811                                         ARM::VLD4DUPd16Pseudo_UPD,
3812                                         ARM::VLD4DUPd32Pseudo_UPD };
3813     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, Opcodes);
3814     return;
3815   }
3816 
3817   case ARMISD::VLD1_UPD: {
3818     static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
3819                                          ARM::VLD1d16wb_fixed,
3820                                          ARM::VLD1d32wb_fixed,
3821                                          ARM::VLD1d64wb_fixed };
3822     static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
3823                                          ARM::VLD1q16wb_fixed,
3824                                          ARM::VLD1q32wb_fixed,
3825                                          ARM::VLD1q64wb_fixed };
3826     SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
3827     return;
3828   }
3829 
3830   case ARMISD::VLD2_UPD: {
3831     static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
3832                                          ARM::VLD2d16wb_fixed,
3833                                          ARM::VLD2d32wb_fixed,
3834                                          ARM::VLD1q64wb_fixed};
3835     static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
3836                                          ARM::VLD2q16PseudoWB_fixed,
3837                                          ARM::VLD2q32PseudoWB_fixed };
3838     SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
3839     return;
3840   }
3841 
3842   case ARMISD::VLD3_UPD: {
3843     static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
3844                                          ARM::VLD3d16Pseudo_UPD,
3845                                          ARM::VLD3d32Pseudo_UPD,
3846                                          ARM::VLD1d64TPseudoWB_fixed};
3847     static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3848                                           ARM::VLD3q16Pseudo_UPD,
3849                                           ARM::VLD3q32Pseudo_UPD };
3850     static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
3851                                           ARM::VLD3q16oddPseudo_UPD,
3852                                           ARM::VLD3q32oddPseudo_UPD };
3853     SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3854     return;
3855   }
3856 
3857   case ARMISD::VLD4_UPD: {
3858     static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
3859                                          ARM::VLD4d16Pseudo_UPD,
3860                                          ARM::VLD4d32Pseudo_UPD,
3861                                          ARM::VLD1d64QPseudoWB_fixed};
3862     static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3863                                           ARM::VLD4q16Pseudo_UPD,
3864                                           ARM::VLD4q32Pseudo_UPD };
3865     static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
3866                                           ARM::VLD4q16oddPseudo_UPD,
3867                                           ARM::VLD4q32oddPseudo_UPD };
3868     SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3869     return;
3870   }
3871 
3872   case ARMISD::VLD2LN_UPD: {
3873     static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
3874                                          ARM::VLD2LNd16Pseudo_UPD,
3875                                          ARM::VLD2LNd32Pseudo_UPD };
3876     static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
3877                                          ARM::VLD2LNq32Pseudo_UPD };
3878     SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
3879     return;
3880   }
3881 
3882   case ARMISD::VLD3LN_UPD: {
3883     static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
3884                                          ARM::VLD3LNd16Pseudo_UPD,
3885                                          ARM::VLD3LNd32Pseudo_UPD };
3886     static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
3887                                          ARM::VLD3LNq32Pseudo_UPD };
3888     SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
3889     return;
3890   }
3891 
3892   case ARMISD::VLD4LN_UPD: {
3893     static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
3894                                          ARM::VLD4LNd16Pseudo_UPD,
3895                                          ARM::VLD4LNd32Pseudo_UPD };
3896     static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
3897                                          ARM::VLD4LNq32Pseudo_UPD };
3898     SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
3899     return;
3900   }
3901 
3902   case ARMISD::VST1_UPD: {
3903     static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
3904                                          ARM::VST1d16wb_fixed,
3905                                          ARM::VST1d32wb_fixed,
3906                                          ARM::VST1d64wb_fixed };
3907     static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
3908                                          ARM::VST1q16wb_fixed,
3909                                          ARM::VST1q32wb_fixed,
3910                                          ARM::VST1q64wb_fixed };
3911     SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
3912     return;
3913   }
3914 
3915   case ARMISD::VST2_UPD: {
3916     static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
3917                                          ARM::VST2d16wb_fixed,
3918                                          ARM::VST2d32wb_fixed,
3919                                          ARM::VST1q64wb_fixed};
3920     static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
3921                                          ARM::VST2q16PseudoWB_fixed,
3922                                          ARM::VST2q32PseudoWB_fixed };
3923     SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
3924     return;
3925   }
3926 
3927   case ARMISD::VST3_UPD: {
3928     static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3929                                          ARM::VST3d16Pseudo_UPD,
3930                                          ARM::VST3d32Pseudo_UPD,
3931                                          ARM::VST1d64TPseudoWB_fixed};
3932     static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3933                                           ARM::VST3q16Pseudo_UPD,
3934                                           ARM::VST3q32Pseudo_UPD };
3935     static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3936                                           ARM::VST3q16oddPseudo_UPD,
3937                                           ARM::VST3q32oddPseudo_UPD };
3938     SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3939     return;
3940   }
3941 
3942   case ARMISD::VST4_UPD: {
3943     static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3944                                          ARM::VST4d16Pseudo_UPD,
3945                                          ARM::VST4d32Pseudo_UPD,
3946                                          ARM::VST1d64QPseudoWB_fixed};
3947     static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3948                                           ARM::VST4q16Pseudo_UPD,
3949                                           ARM::VST4q32Pseudo_UPD };
3950     static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3951                                           ARM::VST4q16oddPseudo_UPD,
3952                                           ARM::VST4q32oddPseudo_UPD };
3953     SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3954     return;
3955   }
3956 
3957   case ARMISD::VST2LN_UPD: {
3958     static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3959                                          ARM::VST2LNd16Pseudo_UPD,
3960                                          ARM::VST2LNd32Pseudo_UPD };
3961     static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3962                                          ARM::VST2LNq32Pseudo_UPD };
3963     SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3964     return;
3965   }
3966 
3967   case ARMISD::VST3LN_UPD: {
3968     static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3969                                          ARM::VST3LNd16Pseudo_UPD,
3970                                          ARM::VST3LNd32Pseudo_UPD };
3971     static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3972                                          ARM::VST3LNq32Pseudo_UPD };
3973     SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3974     return;
3975   }
3976 
3977   case ARMISD::VST4LN_UPD: {
3978     static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3979                                          ARM::VST4LNd16Pseudo_UPD,
3980                                          ARM::VST4LNd32Pseudo_UPD };
3981     static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3982                                          ARM::VST4LNq32Pseudo_UPD };
3983     SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3984     return;
3985   }
3986 
3987   case ISD::INTRINSIC_VOID:
3988   case ISD::INTRINSIC_W_CHAIN: {
3989     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3990     switch (IntNo) {
3991     default:
3992       break;
3993 
3994     case Intrinsic::arm_mrrc:
3995     case Intrinsic::arm_mrrc2: {
3996       SDLoc dl(N);
3997       SDValue Chain = N->getOperand(0);
3998       unsigned Opc;
3999 
4000       if (Subtarget->isThumb())
4001         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
4002       else
4003         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
4004 
4005       SmallVector<SDValue, 5> Ops;
4006       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
4007       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
4008       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
4009 
4010       // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
4011       // instruction will always be '1111' but it is possible in assembly language to specify
4012       // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
4013       if (Opc != ARM::MRRC2) {
4014         Ops.push_back(getAL(CurDAG, dl));
4015         Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4016       }
4017 
4018       Ops.push_back(Chain);
4019 
4020       // Writes to two registers.
4021       const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
4022 
4023       ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
4024       return;
4025     }
4026     case Intrinsic::arm_ldaexd:
4027     case Intrinsic::arm_ldrexd: {
4028       SDLoc dl(N);
4029       SDValue Chain = N->getOperand(0);
4030       SDValue MemAddr = N->getOperand(2);
4031       bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
4032 
4033       bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
4034       unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
4035                                 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
4036 
4037       // arm_ldrexd returns a i64 value in {i32, i32}
4038       std::vector<EVT> ResTys;
4039       if (isThumb) {
4040         ResTys.push_back(MVT::i32);
4041         ResTys.push_back(MVT::i32);
4042       } else
4043         ResTys.push_back(MVT::Untyped);
4044       ResTys.push_back(MVT::Other);
4045 
4046       // Place arguments in the right order.
4047       SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
4048                        CurDAG->getRegister(0, MVT::i32), Chain};
4049       SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
4050       // Transfer memoperands.
4051       MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
4052       CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4053 
4054       // Remap uses.
4055       SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
4056       if (!SDValue(N, 0).use_empty()) {
4057         SDValue Result;
4058         if (isThumb)
4059           Result = SDValue(Ld, 0);
4060         else {
4061           SDValue SubRegIdx =
4062             CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
4063           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
4064               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
4065           Result = SDValue(ResNode,0);
4066         }
4067         ReplaceUses(SDValue(N, 0), Result);
4068       }
4069       if (!SDValue(N, 1).use_empty()) {
4070         SDValue Result;
4071         if (isThumb)
4072           Result = SDValue(Ld, 1);
4073         else {
4074           SDValue SubRegIdx =
4075             CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
4076           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
4077               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
4078           Result = SDValue(ResNode,0);
4079         }
4080         ReplaceUses(SDValue(N, 1), Result);
4081       }
4082       ReplaceUses(SDValue(N, 2), OutChain);
4083       CurDAG->RemoveDeadNode(N);
4084       return;
4085     }
4086     case Intrinsic::arm_stlexd:
4087     case Intrinsic::arm_strexd: {
4088       SDLoc dl(N);
4089       SDValue Chain = N->getOperand(0);
4090       SDValue Val0 = N->getOperand(2);
4091       SDValue Val1 = N->getOperand(3);
4092       SDValue MemAddr = N->getOperand(4);
4093 
4094       // Store exclusive double return a i32 value which is the return status
4095       // of the issued store.
4096       const EVT ResTys[] = {MVT::i32, MVT::Other};
4097 
4098       bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
4099       // Place arguments in the right order.
4100       SmallVector<SDValue, 7> Ops;
4101       if (isThumb) {
4102         Ops.push_back(Val0);
4103         Ops.push_back(Val1);
4104       } else
4105         // arm_strexd uses GPRPair.
4106         Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
4107       Ops.push_back(MemAddr);
4108       Ops.push_back(getAL(CurDAG, dl));
4109       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4110       Ops.push_back(Chain);
4111 
4112       bool IsRelease = IntNo == Intrinsic::arm_stlexd;
4113       unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
4114                                 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
4115 
4116       SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
4117       // Transfer memoperands.
4118       MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
4119       CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4120 
4121       ReplaceNode(N, St);
4122       return;
4123     }
4124 
4125     case Intrinsic::arm_neon_vld1: {
4126       static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
4127                                            ARM::VLD1d32, ARM::VLD1d64 };
4128       static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4129                                            ARM::VLD1q32, ARM::VLD1q64};
4130       SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
4131       return;
4132     }
4133 
4134     case Intrinsic::arm_neon_vld1x2: {
4135       static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4136                                            ARM::VLD1q32, ARM::VLD1q64 };
4137       static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
4138                                            ARM::VLD1d16QPseudo,
4139                                            ARM::VLD1d32QPseudo,
4140                                            ARM::VLD1d64QPseudo };
4141       SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
4142       return;
4143     }
4144 
4145     case Intrinsic::arm_neon_vld1x3: {
4146       static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
4147                                            ARM::VLD1d16TPseudo,
4148                                            ARM::VLD1d32TPseudo,
4149                                            ARM::VLD1d64TPseudo };
4150       static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
4151                                             ARM::VLD1q16LowTPseudo_UPD,
4152                                             ARM::VLD1q32LowTPseudo_UPD,
4153                                             ARM::VLD1q64LowTPseudo_UPD };
4154       static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
4155                                             ARM::VLD1q16HighTPseudo,
4156                                             ARM::VLD1q32HighTPseudo,
4157                                             ARM::VLD1q64HighTPseudo };
4158       SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4159       return;
4160     }
4161 
4162     case Intrinsic::arm_neon_vld1x4: {
4163       static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
4164                                            ARM::VLD1d16QPseudo,
4165                                            ARM::VLD1d32QPseudo,
4166                                            ARM::VLD1d64QPseudo };
4167       static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
4168                                             ARM::VLD1q16LowQPseudo_UPD,
4169                                             ARM::VLD1q32LowQPseudo_UPD,
4170                                             ARM::VLD1q64LowQPseudo_UPD };
4171       static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
4172                                             ARM::VLD1q16HighQPseudo,
4173                                             ARM::VLD1q32HighQPseudo,
4174                                             ARM::VLD1q64HighQPseudo };
4175       SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4176       return;
4177     }
4178 
4179     case Intrinsic::arm_neon_vld2: {
4180       static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
4181                                            ARM::VLD2d32, ARM::VLD1q64 };
4182       static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
4183                                            ARM::VLD2q32Pseudo };
4184       SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
4185       return;
4186     }
4187 
4188     case Intrinsic::arm_neon_vld3: {
4189       static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
4190                                            ARM::VLD3d16Pseudo,
4191                                            ARM::VLD3d32Pseudo,
4192                                            ARM::VLD1d64TPseudo };
4193       static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4194                                             ARM::VLD3q16Pseudo_UPD,
4195                                             ARM::VLD3q32Pseudo_UPD };
4196       static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
4197                                             ARM::VLD3q16oddPseudo,
4198                                             ARM::VLD3q32oddPseudo };
4199       SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4200       return;
4201     }
4202 
4203     case Intrinsic::arm_neon_vld4: {
4204       static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
4205                                            ARM::VLD4d16Pseudo,
4206                                            ARM::VLD4d32Pseudo,
4207                                            ARM::VLD1d64QPseudo };
4208       static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
4209                                             ARM::VLD4q16Pseudo_UPD,
4210                                             ARM::VLD4q32Pseudo_UPD };
4211       static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
4212                                             ARM::VLD4q16oddPseudo,
4213                                             ARM::VLD4q32oddPseudo };
4214       SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4215       return;
4216     }
4217 
4218     case Intrinsic::arm_neon_vld2dup: {
4219       static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4220                                            ARM::VLD2DUPd32, ARM::VLD1q64 };
4221       static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4222                                             ARM::VLD2DUPq16EvenPseudo,
4223                                             ARM::VLD2DUPq32EvenPseudo };
4224       static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
4225                                             ARM::VLD2DUPq16OddPseudo,
4226                                             ARM::VLD2DUPq32OddPseudo };
4227       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
4228                    DOpcodes, QOpcodes0, QOpcodes1);
4229       return;
4230     }
4231 
4232     case Intrinsic::arm_neon_vld3dup: {
4233       static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
4234                                            ARM::VLD3DUPd16Pseudo,
4235                                            ARM::VLD3DUPd32Pseudo,
4236                                            ARM::VLD1d64TPseudo };
4237       static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4238                                             ARM::VLD3DUPq16EvenPseudo,
4239                                             ARM::VLD3DUPq32EvenPseudo };
4240       static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
4241                                             ARM::VLD3DUPq16OddPseudo,
4242                                             ARM::VLD3DUPq32OddPseudo };
4243       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
4244                    DOpcodes, QOpcodes0, QOpcodes1);
4245       return;
4246     }
4247 
4248     case Intrinsic::arm_neon_vld4dup: {
4249       static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
4250                                            ARM::VLD4DUPd16Pseudo,
4251                                            ARM::VLD4DUPd32Pseudo,
4252                                            ARM::VLD1d64QPseudo };
4253       static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4254                                             ARM::VLD4DUPq16EvenPseudo,
4255                                             ARM::VLD4DUPq32EvenPseudo };
4256       static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
4257                                             ARM::VLD4DUPq16OddPseudo,
4258                                             ARM::VLD4DUPq32OddPseudo };
4259       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
4260                    DOpcodes, QOpcodes0, QOpcodes1);
4261       return;
4262     }
4263 
4264     case Intrinsic::arm_neon_vld2lane: {
4265       static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
4266                                            ARM::VLD2LNd16Pseudo,
4267                                            ARM::VLD2LNd32Pseudo };
4268       static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
4269                                            ARM::VLD2LNq32Pseudo };
4270       SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
4271       return;
4272     }
4273 
4274     case Intrinsic::arm_neon_vld3lane: {
4275       static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
4276                                            ARM::VLD3LNd16Pseudo,
4277                                            ARM::VLD3LNd32Pseudo };
4278       static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
4279                                            ARM::VLD3LNq32Pseudo };
4280       SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
4281       return;
4282     }
4283 
4284     case Intrinsic::arm_neon_vld4lane: {
4285       static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
4286                                            ARM::VLD4LNd16Pseudo,
4287                                            ARM::VLD4LNd32Pseudo };
4288       static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
4289                                            ARM::VLD4LNq32Pseudo };
4290       SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
4291       return;
4292     }
4293 
4294     case Intrinsic::arm_neon_vst1: {
4295       static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
4296                                            ARM::VST1d32, ARM::VST1d64 };
4297       static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
4298                                            ARM::VST1q32, ARM::VST1q64 };
4299       SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
4300       return;
4301     }
4302 
4303     case Intrinsic::arm_neon_vst1x2: {
4304       static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
4305                                            ARM::VST1q32, ARM::VST1q64 };
4306       static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
4307                                            ARM::VST1d16QPseudo,
4308                                            ARM::VST1d32QPseudo,
4309                                            ARM::VST1d64QPseudo };
4310       SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
4311       return;
4312     }
4313 
4314     case Intrinsic::arm_neon_vst1x3: {
4315       static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
4316                                            ARM::VST1d16TPseudo,
4317                                            ARM::VST1d32TPseudo,
4318                                            ARM::VST1d64TPseudo };
4319       static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
4320                                             ARM::VST1q16LowTPseudo_UPD,
4321                                             ARM::VST1q32LowTPseudo_UPD,
4322                                             ARM::VST1q64LowTPseudo_UPD };
4323       static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
4324                                             ARM::VST1q16HighTPseudo,
4325                                             ARM::VST1q32HighTPseudo,
4326                                             ARM::VST1q64HighTPseudo };
4327       SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4328       return;
4329     }
4330 
4331     case Intrinsic::arm_neon_vst1x4: {
4332       static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
4333                                            ARM::VST1d16QPseudo,
4334                                            ARM::VST1d32QPseudo,
4335                                            ARM::VST1d64QPseudo };
4336       static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
4337                                             ARM::VST1q16LowQPseudo_UPD,
4338                                             ARM::VST1q32LowQPseudo_UPD,
4339                                             ARM::VST1q64LowQPseudo_UPD };
4340       static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
4341                                             ARM::VST1q16HighQPseudo,
4342                                             ARM::VST1q32HighQPseudo,
4343                                             ARM::VST1q64HighQPseudo };
4344       SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4345       return;
4346     }
4347 
4348     case Intrinsic::arm_neon_vst2: {
4349       static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
4350                                            ARM::VST2d32, ARM::VST1q64 };
4351       static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
4352                                            ARM::VST2q32Pseudo };
4353       SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
4354       return;
4355     }
4356 
4357     case Intrinsic::arm_neon_vst3: {
4358       static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
4359                                            ARM::VST3d16Pseudo,
4360                                            ARM::VST3d32Pseudo,
4361                                            ARM::VST1d64TPseudo };
4362       static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
4363                                             ARM::VST3q16Pseudo_UPD,
4364                                             ARM::VST3q32Pseudo_UPD };
4365       static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
4366                                             ARM::VST3q16oddPseudo,
4367                                             ARM::VST3q32oddPseudo };
4368       SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4369       return;
4370     }
4371 
4372     case Intrinsic::arm_neon_vst4: {
4373       static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
4374                                            ARM::VST4d16Pseudo,
4375                                            ARM::VST4d32Pseudo,
4376                                            ARM::VST1d64QPseudo };
4377       static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
4378                                             ARM::VST4q16Pseudo_UPD,
4379                                             ARM::VST4q32Pseudo_UPD };
4380       static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
4381                                             ARM::VST4q16oddPseudo,
4382                                             ARM::VST4q32oddPseudo };
4383       SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4384       return;
4385     }
4386 
4387     case Intrinsic::arm_neon_vst2lane: {
4388       static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
4389                                            ARM::VST2LNd16Pseudo,
4390                                            ARM::VST2LNd32Pseudo };
4391       static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
4392                                            ARM::VST2LNq32Pseudo };
4393       SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
4394       return;
4395     }
4396 
4397     case Intrinsic::arm_neon_vst3lane: {
4398       static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
4399                                            ARM::VST3LNd16Pseudo,
4400                                            ARM::VST3LNd32Pseudo };
4401       static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
4402                                            ARM::VST3LNq32Pseudo };
4403       SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
4404       return;
4405     }
4406 
4407     case Intrinsic::arm_neon_vst4lane: {
4408       static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
4409                                            ARM::VST4LNd16Pseudo,
4410                                            ARM::VST4LNd32Pseudo };
4411       static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
4412                                            ARM::VST4LNq32Pseudo };
4413       SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
4414       return;
4415     }
4416 
4417     case Intrinsic::arm_mve_vldr_gather_base_wb:
4418     case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
4419       static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre,
4420                                          ARM::MVE_VLDRDU64_qi_pre};
4421       SelectMVE_WB(N, Opcodes,
4422                    IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated);
4423       return;
4424     }
4425 
4426     case Intrinsic::arm_mve_vld2q: {
4427       static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8};
4428       static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
4429                                            ARM::MVE_VLD21_16};
4430       static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
4431                                            ARM::MVE_VLD21_32};
4432       static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4433       SelectMVE_VLD(N, 2, Opcodes);
4434       return;
4435     }
4436 
4437     case Intrinsic::arm_mve_vld4q: {
4438       static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
4439                                           ARM::MVE_VLD42_8, ARM::MVE_VLD43_8};
4440       static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
4441                                            ARM::MVE_VLD42_16,
4442                                            ARM::MVE_VLD43_16};
4443       static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
4444                                            ARM::MVE_VLD42_32,
4445                                            ARM::MVE_VLD43_32};
4446       static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4447       SelectMVE_VLD(N, 4, Opcodes);
4448       return;
4449     }
4450     }
4451     break;
4452   }
4453 
4454   case ISD::INTRINSIC_WO_CHAIN: {
4455     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
4456     switch (IntNo) {
4457     default:
4458       break;
4459 
4460     case Intrinsic::arm_mve_urshrl:
4461       SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false);
4462       return;
4463     case Intrinsic::arm_mve_uqshll:
4464       SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false);
4465       return;
4466     case Intrinsic::arm_mve_srshrl:
4467       SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false);
4468       return;
4469     case Intrinsic::arm_mve_sqshll:
4470       SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false);
4471       return;
4472     case Intrinsic::arm_mve_uqrshll:
4473       SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true);
4474       return;
4475     case Intrinsic::arm_mve_sqrshrl:
4476       SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true);
4477       return;
4478     case Intrinsic::arm_mve_lsll:
4479       SelectMVE_LongShift(N, ARM::MVE_LSLLr, false, false);
4480       return;
4481     case Intrinsic::arm_mve_asrl:
4482       SelectMVE_LongShift(N, ARM::MVE_ASRLr, false, false);
4483       return;
4484 
4485     case Intrinsic::arm_mve_vadc:
4486     case Intrinsic::arm_mve_vadc_predicated:
4487       SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true,
4488                         IntNo == Intrinsic::arm_mve_vadc_predicated);
4489       return;
4490 
4491     case Intrinsic::arm_mve_vmlldava:
4492     case Intrinsic::arm_mve_vmlldava_predicated: {
4493       static const uint16_t OpcodesU[] = {
4494           ARM::MVE_VMLALDAVu16,   ARM::MVE_VMLALDAVu32,
4495           ARM::MVE_VMLALDAVau16,  ARM::MVE_VMLALDAVau32,
4496       };
4497       static const uint16_t OpcodesS[] = {
4498           ARM::MVE_VMLALDAVs16,   ARM::MVE_VMLALDAVs32,
4499           ARM::MVE_VMLALDAVas16,  ARM::MVE_VMLALDAVas32,
4500           ARM::MVE_VMLALDAVxs16,  ARM::MVE_VMLALDAVxs32,
4501           ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32,
4502           ARM::MVE_VMLSLDAVs16,   ARM::MVE_VMLSLDAVs32,
4503           ARM::MVE_VMLSLDAVas16,  ARM::MVE_VMLSLDAVas32,
4504           ARM::MVE_VMLSLDAVxs16,  ARM::MVE_VMLSLDAVxs32,
4505           ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32,
4506       };
4507       SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated,
4508                         OpcodesS, OpcodesU);
4509       return;
4510     }
4511 
4512     case Intrinsic::arm_mve_vrmlldavha:
4513     case Intrinsic::arm_mve_vrmlldavha_predicated: {
4514       static const uint16_t OpcodesU[] = {
4515           ARM::MVE_VRMLALDAVHu32,  ARM::MVE_VRMLALDAVHau32,
4516       };
4517       static const uint16_t OpcodesS[] = {
4518           ARM::MVE_VRMLALDAVHs32,  ARM::MVE_VRMLALDAVHas32,
4519           ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32,
4520           ARM::MVE_VRMLSLDAVHs32,  ARM::MVE_VRMLSLDAVHas32,
4521           ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32,
4522       };
4523       SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated,
4524                           OpcodesS, OpcodesU);
4525       return;
4526     }
4527     }
4528     break;
4529   }
4530 
4531   case ISD::ATOMIC_CMP_SWAP:
4532     SelectCMP_SWAP(N);
4533     return;
4534   }
4535 
4536   SelectCode(N);
4537 }
4538 
4539 // Inspect a register string of the form
4540 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
4541 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
4542 // and obtain the integer operands from them, adding these operands to the
4543 // provided vector.
4544 static void getIntOperandsFromRegisterString(StringRef RegString,
4545                                              SelectionDAG *CurDAG,
4546                                              const SDLoc &DL,
4547                                              std::vector<SDValue> &Ops) {
4548   SmallVector<StringRef, 5> Fields;
4549   RegString.split(Fields, ':');
4550 
4551   if (Fields.size() > 1) {
4552     bool AllIntFields = true;
4553 
4554     for (StringRef Field : Fields) {
4555       // Need to trim out leading 'cp' characters and get the integer field.
4556       unsigned IntField;
4557       AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
4558       Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
4559     }
4560 
4561     assert(AllIntFields &&
4562             "Unexpected non-integer value in special register string.");
4563   }
4564 }
4565 
4566 // Maps a Banked Register string to its mask value. The mask value returned is
4567 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
4568 // mask operand, which expresses which register is to be used, e.g. r8, and in
4569 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
4570 // was invalid.
4571 static inline int getBankedRegisterMask(StringRef RegString) {
4572   auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());
4573   if (!TheReg)
4574      return -1;
4575   return TheReg->Encoding;
4576 }
4577 
4578 // The flags here are common to those allowed for apsr in the A class cores and
4579 // those allowed for the special registers in the M class cores. Returns a
4580 // value representing which flags were present, -1 if invalid.
4581 static inline int getMClassFlagsMask(StringRef Flags) {
4582   return StringSwitch<int>(Flags)
4583           .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
4584                          // correct when flags are not permitted
4585           .Case("g", 0x1)
4586           .Case("nzcvq", 0x2)
4587           .Case("nzcvqg", 0x3)
4588           .Default(-1);
4589 }
4590 
4591 // Maps MClass special registers string to its value for use in the
4592 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
4593 // Returns -1 to signify that the string was invalid.
4594 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
4595   auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg);
4596   const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
4597   if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits))
4598     return -1;
4599   return (int)(TheReg->Encoding & 0xFFF); // SYSm value
4600 }
4601 
4602 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
4603   // The mask operand contains the special register (R Bit) in bit 4, whether
4604   // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
4605   // bits 3-0 contains the fields to be accessed in the special register, set by
4606   // the flags provided with the register.
4607   int Mask = 0;
4608   if (Reg == "apsr") {
4609     // The flags permitted for apsr are the same flags that are allowed in
4610     // M class registers. We get the flag value and then shift the flags into
4611     // the correct place to combine with the mask.
4612     Mask = getMClassFlagsMask(Flags);
4613     if (Mask == -1)
4614       return -1;
4615     return Mask << 2;
4616   }
4617 
4618   if (Reg != "cpsr" && Reg != "spsr") {
4619     return -1;
4620   }
4621 
4622   // This is the same as if the flags were "fc"
4623   if (Flags.empty() || Flags == "all")
4624     return Mask | 0x9;
4625 
4626   // Inspect the supplied flags string and set the bits in the mask for
4627   // the relevant and valid flags allowed for cpsr and spsr.
4628   for (char Flag : Flags) {
4629     int FlagVal;
4630     switch (Flag) {
4631       case 'c':
4632         FlagVal = 0x1;
4633         break;
4634       case 'x':
4635         FlagVal = 0x2;
4636         break;
4637       case 's':
4638         FlagVal = 0x4;
4639         break;
4640       case 'f':
4641         FlagVal = 0x8;
4642         break;
4643       default:
4644         FlagVal = 0;
4645     }
4646 
4647     // This avoids allowing strings where the same flag bit appears twice.
4648     if (!FlagVal || (Mask & FlagVal))
4649       return -1;
4650     Mask |= FlagVal;
4651   }
4652 
4653   // If the register is spsr then we need to set the R bit.
4654   if (Reg == "spsr")
4655     Mask |= 0x10;
4656 
4657   return Mask;
4658 }
4659 
4660 // Lower the read_register intrinsic to ARM specific DAG nodes
4661 // using the supplied metadata string to select the instruction node to use
4662 // and the registers/masks to construct as operands for the node.
4663 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
4664   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4665   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4666   bool IsThumb2 = Subtarget->isThumb2();
4667   SDLoc DL(N);
4668 
4669   std::vector<SDValue> Ops;
4670   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4671 
4672   if (!Ops.empty()) {
4673     // If the special register string was constructed of fields (as defined
4674     // in the ACLE) then need to lower to MRC node (32 bit) or
4675     // MRRC node(64 bit), we can make the distinction based on the number of
4676     // operands we have.
4677     unsigned Opcode;
4678     SmallVector<EVT, 3> ResTypes;
4679     if (Ops.size() == 5){
4680       Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
4681       ResTypes.append({ MVT::i32, MVT::Other });
4682     } else {
4683       assert(Ops.size() == 3 &&
4684               "Invalid number of fields in special register string.");
4685       Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
4686       ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
4687     }
4688 
4689     Ops.push_back(getAL(CurDAG, DL));
4690     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4691     Ops.push_back(N->getOperand(0));
4692     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
4693     return true;
4694   }
4695 
4696   std::string SpecialReg = RegString->getString().lower();
4697 
4698   int BankedReg = getBankedRegisterMask(SpecialReg);
4699   if (BankedReg != -1) {
4700     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
4701             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4702             N->getOperand(0) };
4703     ReplaceNode(
4704         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
4705                                   DL, MVT::i32, MVT::Other, Ops));
4706     return true;
4707   }
4708 
4709   // The VFP registers are read by creating SelectionDAG nodes with opcodes
4710   // corresponding to the register that is being read from. So we switch on the
4711   // string to find which opcode we need to use.
4712   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4713                     .Case("fpscr", ARM::VMRS)
4714                     .Case("fpexc", ARM::VMRS_FPEXC)
4715                     .Case("fpsid", ARM::VMRS_FPSID)
4716                     .Case("mvfr0", ARM::VMRS_MVFR0)
4717                     .Case("mvfr1", ARM::VMRS_MVFR1)
4718                     .Case("mvfr2", ARM::VMRS_MVFR2)
4719                     .Case("fpinst", ARM::VMRS_FPINST)
4720                     .Case("fpinst2", ARM::VMRS_FPINST2)
4721                     .Default(0);
4722 
4723   // If an opcode was found then we can lower the read to a VFP instruction.
4724   if (Opcode) {
4725     if (!Subtarget->hasVFP2Base())
4726       return false;
4727     if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
4728       return false;
4729 
4730     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4731             N->getOperand(0) };
4732     ReplaceNode(N,
4733                 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
4734     return true;
4735   }
4736 
4737   // If the target is M Class then need to validate that the register string
4738   // is an acceptable value, so check that a mask can be constructed from the
4739   // string.
4740   if (Subtarget->isMClass()) {
4741     int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4742     if (SYSmValue == -1)
4743       return false;
4744 
4745     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4746                       getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4747                       N->getOperand(0) };
4748     ReplaceNode(
4749         N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
4750     return true;
4751   }
4752 
4753   // Here we know the target is not M Class so we need to check if it is one
4754   // of the remaining possible values which are apsr, cpsr or spsr.
4755   if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
4756     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4757             N->getOperand(0) };
4758     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
4759                                           DL, MVT::i32, MVT::Other, Ops));
4760     return true;
4761   }
4762 
4763   if (SpecialReg == "spsr") {
4764     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4765             N->getOperand(0) };
4766     ReplaceNode(
4767         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
4768                                   MVT::i32, MVT::Other, Ops));
4769     return true;
4770   }
4771 
4772   return false;
4773 }
4774 
4775 // Lower the write_register intrinsic to ARM specific DAG nodes
4776 // using the supplied metadata string to select the instruction node to use
4777 // and the registers/masks to use in the nodes
4778 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
4779   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4780   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4781   bool IsThumb2 = Subtarget->isThumb2();
4782   SDLoc DL(N);
4783 
4784   std::vector<SDValue> Ops;
4785   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4786 
4787   if (!Ops.empty()) {
4788     // If the special register string was constructed of fields (as defined
4789     // in the ACLE) then need to lower to MCR node (32 bit) or
4790     // MCRR node(64 bit), we can make the distinction based on the number of
4791     // operands we have.
4792     unsigned Opcode;
4793     if (Ops.size() == 5) {
4794       Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
4795       Ops.insert(Ops.begin()+2, N->getOperand(2));
4796     } else {
4797       assert(Ops.size() == 3 &&
4798               "Invalid number of fields in special register string.");
4799       Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
4800       SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
4801       Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
4802     }
4803 
4804     Ops.push_back(getAL(CurDAG, DL));
4805     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4806     Ops.push_back(N->getOperand(0));
4807 
4808     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4809     return true;
4810   }
4811 
4812   std::string SpecialReg = RegString->getString().lower();
4813   int BankedReg = getBankedRegisterMask(SpecialReg);
4814   if (BankedReg != -1) {
4815     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
4816             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4817             N->getOperand(0) };
4818     ReplaceNode(
4819         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
4820                                   DL, MVT::Other, Ops));
4821     return true;
4822   }
4823 
4824   // The VFP registers are written to by creating SelectionDAG nodes with
4825   // opcodes corresponding to the register that is being written. So we switch
4826   // on the string to find which opcode we need to use.
4827   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4828                     .Case("fpscr", ARM::VMSR)
4829                     .Case("fpexc", ARM::VMSR_FPEXC)
4830                     .Case("fpsid", ARM::VMSR_FPSID)
4831                     .Case("fpinst", ARM::VMSR_FPINST)
4832                     .Case("fpinst2", ARM::VMSR_FPINST2)
4833                     .Default(0);
4834 
4835   if (Opcode) {
4836     if (!Subtarget->hasVFP2Base())
4837       return false;
4838     Ops = { N->getOperand(2), getAL(CurDAG, DL),
4839             CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4840     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4841     return true;
4842   }
4843 
4844   std::pair<StringRef, StringRef> Fields;
4845   Fields = StringRef(SpecialReg).rsplit('_');
4846   std::string Reg = Fields.first.str();
4847   StringRef Flags = Fields.second;
4848 
4849   // If the target was M Class then need to validate the special register value
4850   // and retrieve the mask for use in the instruction node.
4851   if (Subtarget->isMClass()) {
4852     int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4853     if (SYSmValue == -1)
4854       return false;
4855 
4856     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4857                       N->getOperand(2), getAL(CurDAG, DL),
4858                       CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4859     ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
4860     return true;
4861   }
4862 
4863   // We then check to see if a valid mask can be constructed for one of the
4864   // register string values permitted for the A and R class cores. These values
4865   // are apsr, spsr and cpsr; these are also valid on older cores.
4866   int Mask = getARClassRegisterMask(Reg, Flags);
4867   if (Mask != -1) {
4868     Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
4869             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4870             N->getOperand(0) };
4871     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
4872                                           DL, MVT::Other, Ops));
4873     return true;
4874   }
4875 
4876   return false;
4877 }
4878 
4879 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
4880   std::vector<SDValue> AsmNodeOperands;
4881   unsigned Flag, Kind;
4882   bool Changed = false;
4883   unsigned NumOps = N->getNumOperands();
4884 
4885   // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
4886   // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
4887   // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
4888   // respectively. Since there is no constraint to explicitly specify a
4889   // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
4890   // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
4891   // them into a GPRPair.
4892 
4893   SDLoc dl(N);
4894   SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
4895                                    : SDValue(nullptr,0);
4896 
4897   SmallVector<bool, 8> OpChanged;
4898   // Glue node will be appended late.
4899   for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
4900     SDValue op = N->getOperand(i);
4901     AsmNodeOperands.push_back(op);
4902 
4903     if (i < InlineAsm::Op_FirstOperand)
4904       continue;
4905 
4906     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
4907       Flag = C->getZExtValue();
4908       Kind = InlineAsm::getKind(Flag);
4909     }
4910     else
4911       continue;
4912 
4913     // Immediate operands to inline asm in the SelectionDAG are modeled with
4914     // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
4915     // the second is a constant with the value of the immediate. If we get here
4916     // and we have a Kind_Imm, skip the next operand, and continue.
4917     if (Kind == InlineAsm::Kind_Imm) {
4918       SDValue op = N->getOperand(++i);
4919       AsmNodeOperands.push_back(op);
4920       continue;
4921     }
4922 
4923     unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
4924     if (NumRegs)
4925       OpChanged.push_back(false);
4926 
4927     unsigned DefIdx = 0;
4928     bool IsTiedToChangedOp = false;
4929     // If it's a use that is tied with a previous def, it has no
4930     // reg class constraint.
4931     if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
4932       IsTiedToChangedOp = OpChanged[DefIdx];
4933 
4934     // Memory operands to inline asm in the SelectionDAG are modeled with two
4935     // operands: a constant of value InlineAsm::Kind_Mem followed by the input
4936     // operand. If we get here and we have a Kind_Mem, skip the next operand (so
4937     // it doesn't get misinterpreted), and continue. We do this here because
4938     // it's important to update the OpChanged array correctly before moving on.
4939     if (Kind == InlineAsm::Kind_Mem) {
4940       SDValue op = N->getOperand(++i);
4941       AsmNodeOperands.push_back(op);
4942       continue;
4943     }
4944 
4945     if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
4946         && Kind != InlineAsm::Kind_RegDefEarlyClobber)
4947       continue;
4948 
4949     unsigned RC;
4950     bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
4951     if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
4952         || NumRegs != 2)
4953       continue;
4954 
4955     assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
4956     SDValue V0 = N->getOperand(i+1);
4957     SDValue V1 = N->getOperand(i+2);
4958     unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
4959     unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
4960     SDValue PairedReg;
4961     MachineRegisterInfo &MRI = MF->getRegInfo();
4962 
4963     if (Kind == InlineAsm::Kind_RegDef ||
4964         Kind == InlineAsm::Kind_RegDefEarlyClobber) {
4965       // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
4966       // the original GPRs.
4967 
4968       Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4969       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4970       SDValue Chain = SDValue(N,0);
4971 
4972       SDNode *GU = N->getGluedUser();
4973       SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
4974                                                Chain.getValue(1));
4975 
4976       // Extract values from a GPRPair reg and copy to the original GPR reg.
4977       SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4978                                                     RegCopy);
4979       SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4980                                                     RegCopy);
4981       SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
4982                                         RegCopy.getValue(1));
4983       SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
4984 
4985       // Update the original glue user.
4986       std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
4987       Ops.push_back(T1.getValue(1));
4988       CurDAG->UpdateNodeOperands(GU, Ops);
4989     }
4990     else {
4991       // For Kind  == InlineAsm::Kind_RegUse, we first copy two GPRs into a
4992       // GPRPair and then pass the GPRPair to the inline asm.
4993       SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
4994 
4995       // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
4996       SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
4997                                           Chain.getValue(1));
4998       SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
4999                                           T0.getValue(1));
5000       SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
5001 
5002       // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
5003       // i32 VRs of inline asm with it.
5004       Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
5005       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
5006       Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
5007 
5008       AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
5009       Glue = Chain.getValue(1);
5010     }
5011 
5012     Changed = true;
5013 
5014     if(PairedReg.getNode()) {
5015       OpChanged[OpChanged.size() -1 ] = true;
5016       Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
5017       if (IsTiedToChangedOp)
5018         Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
5019       else
5020         Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
5021       // Replace the current flag.
5022       AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
5023           Flag, dl, MVT::i32);
5024       // Add the new register node and skip the original two GPRs.
5025       AsmNodeOperands.push_back(PairedReg);
5026       // Skip the next two GPRs.
5027       i += 2;
5028     }
5029   }
5030 
5031   if (Glue.getNode())
5032     AsmNodeOperands.push_back(Glue);
5033   if (!Changed)
5034     return false;
5035 
5036   SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
5037       CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
5038   New->setNodeId(-1);
5039   ReplaceNode(N, New.getNode());
5040   return true;
5041 }
5042 
5043 
5044 bool ARMDAGToDAGISel::
5045 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
5046                              std::vector<SDValue> &OutOps) {
5047   switch(ConstraintID) {
5048   default:
5049     llvm_unreachable("Unexpected asm memory constraint");
5050   case InlineAsm::Constraint_m:
5051   case InlineAsm::Constraint_o:
5052   case InlineAsm::Constraint_Q:
5053   case InlineAsm::Constraint_Um:
5054   case InlineAsm::Constraint_Un:
5055   case InlineAsm::Constraint_Uq:
5056   case InlineAsm::Constraint_Us:
5057   case InlineAsm::Constraint_Ut:
5058   case InlineAsm::Constraint_Uv:
5059   case InlineAsm::Constraint_Uy:
5060     // Require the address to be in a register.  That is safe for all ARM
5061     // variants and it is hard to do anything much smarter without knowing
5062     // how the operand is used.
5063     OutOps.push_back(Op);
5064     return false;
5065   }
5066   return true;
5067 }
5068 
5069 /// createARMISelDag - This pass converts a legalized DAG into a
5070 /// ARM-specific DAG, ready for instruction scheduling.
5071 ///
5072 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
5073                                      CodeGenOpt::Level OptLevel) {
5074   return new ARMDAGToDAGISel(TM, OptLevel);
5075 }
5076