1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the ARM target.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "ARM.h"
14 #include "ARMBaseInstrInfo.h"
15 #include "ARMTargetMachine.h"
16 #include "MCTargetDesc/ARMAddressingModes.h"
17 #include "Utils/ARMBaseInfo.h"
18 #include "llvm/ADT/APSInt.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/CodeGen/SelectionDAGISel.h"
26 #include "llvm/CodeGen/TargetLowering.h"
27 #include "llvm/IR/CallingConv.h"
28 #include "llvm/IR/Constants.h"
29 #include "llvm/IR/DerivedTypes.h"
30 #include "llvm/IR/Function.h"
31 #include "llvm/IR/Intrinsics.h"
32 #include "llvm/IR/IntrinsicsARM.h"
33 #include "llvm/IR/LLVMContext.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/ErrorHandling.h"
37 #include "llvm/Target/TargetOptions.h"
38 #include <optional>
39
40 using namespace llvm;
41
42 #define DEBUG_TYPE "arm-isel"
43 #define PASS_NAME "ARM Instruction Selection"
44
45 static cl::opt<bool>
46 DisableShifterOp("disable-shifter-op", cl::Hidden,
47 cl::desc("Disable isel of shifter-op"),
48 cl::init(false));
49
50 //===--------------------------------------------------------------------===//
51 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
52 /// instructions for SelectionDAG operations.
53 ///
54 namespace {
55
56 class ARMDAGToDAGISel : public SelectionDAGISel {
57 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
58 /// make the right decision when generating code for different targets.
59 const ARMSubtarget *Subtarget;
60
61 public:
62 static char ID;
63
64 ARMDAGToDAGISel() = delete;
65
ARMDAGToDAGISel(ARMBaseTargetMachine & tm,CodeGenOptLevel OptLevel)66 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel)
67 : SelectionDAGISel(ID, tm, OptLevel) {}
68
runOnMachineFunction(MachineFunction & MF)69 bool runOnMachineFunction(MachineFunction &MF) override {
70 // Reset the subtarget each time through.
71 Subtarget = &MF.getSubtarget<ARMSubtarget>();
72 SelectionDAGISel::runOnMachineFunction(MF);
73 return true;
74 }
75
76 void PreprocessISelDAG() override;
77
78 /// getI32Imm - Return a target constant of type i32 with the specified
79 /// value.
getI32Imm(unsigned Imm,const SDLoc & dl)80 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
81 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
82 }
83
84 void Select(SDNode *N) override;
85
86 /// Return true as some complex patterns, like those that call
87 /// canExtractShiftFromMul can modify the DAG inplace.
ComplexPatternFuncMutatesDAG() const88 bool ComplexPatternFuncMutatesDAG() const override { return true; }
89
90 bool hasNoVMLxHazardUse(SDNode *N) const;
91 bool isShifterOpProfitable(const SDValue &Shift,
92 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
93 bool SelectRegShifterOperand(SDValue N, SDValue &A,
94 SDValue &B, SDValue &C,
95 bool CheckProfitability = true);
96 bool SelectImmShifterOperand(SDValue N, SDValue &A,
97 SDValue &B, bool CheckProfitability = true);
SelectShiftRegShifterOperand(SDValue N,SDValue & A,SDValue & B,SDValue & C)98 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B,
99 SDValue &C) {
100 // Don't apply the profitability check
101 return SelectRegShifterOperand(N, A, B, C, false);
102 }
SelectShiftImmShifterOperand(SDValue N,SDValue & A,SDValue & B)103 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, SDValue &B) {
104 // Don't apply the profitability check
105 return SelectImmShifterOperand(N, A, B, false);
106 }
SelectShiftImmShifterOperandOneUse(SDValue N,SDValue & A,SDValue & B)107 bool SelectShiftImmShifterOperandOneUse(SDValue N, SDValue &A, SDValue &B) {
108 if (!N.hasOneUse())
109 return false;
110 return SelectImmShifterOperand(N, A, B, false);
111 }
112
113 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
114
115 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
116 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
117
SelectCMOVPred(SDValue N,SDValue & Pred,SDValue & Reg)118 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
119 const ConstantSDNode *CN = cast<ConstantSDNode>(N);
120 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
121 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
122 return true;
123 }
124
125 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
126 SDValue &Offset, SDValue &Opc);
127 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
128 SDValue &Offset, SDValue &Opc);
129 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
130 SDValue &Offset, SDValue &Opc);
131 bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
132 bool SelectAddrMode3(SDValue N, SDValue &Base,
133 SDValue &Offset, SDValue &Opc);
134 bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
135 SDValue &Offset, SDValue &Opc);
136 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
137 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
138 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
139 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
140 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
141
142 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
143
144 // Thumb Addressing Modes:
145 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
146 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
147 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
148 SDValue &OffImm);
149 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
150 SDValue &OffImm);
151 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
152 SDValue &OffImm);
153 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
154 SDValue &OffImm);
155 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
156 template <unsigned Shift>
157 bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
158
159 // Thumb 2 Addressing Modes:
160 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
161 template <unsigned Shift>
162 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm);
163 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
164 SDValue &OffImm);
165 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
166 SDValue &OffImm);
167 template <unsigned Shift>
168 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);
169 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,
170 unsigned Shift);
171 template <unsigned Shift>
172 bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
173 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
174 SDValue &OffReg, SDValue &ShImm);
175 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
176
177 template<int Min, int Max>
178 bool SelectImmediateInRange(SDValue N, SDValue &OffImm);
179
is_so_imm(unsigned Imm) const180 inline bool is_so_imm(unsigned Imm) const {
181 return ARM_AM::getSOImmVal(Imm) != -1;
182 }
183
is_so_imm_not(unsigned Imm) const184 inline bool is_so_imm_not(unsigned Imm) const {
185 return ARM_AM::getSOImmVal(~Imm) != -1;
186 }
187
is_t2_so_imm(unsigned Imm) const188 inline bool is_t2_so_imm(unsigned Imm) const {
189 return ARM_AM::getT2SOImmVal(Imm) != -1;
190 }
191
is_t2_so_imm_not(unsigned Imm) const192 inline bool is_t2_so_imm_not(unsigned Imm) const {
193 return ARM_AM::getT2SOImmVal(~Imm) != -1;
194 }
195
196 // Include the pieces autogenerated from the target description.
197 #include "ARMGenDAGISel.inc"
198
199 private:
200 void transferMemOperands(SDNode *Src, SDNode *Dst);
201
202 /// Indexed (pre/post inc/dec) load matching code for ARM.
203 bool tryARMIndexedLoad(SDNode *N);
204 bool tryT1IndexedLoad(SDNode *N);
205 bool tryT2IndexedLoad(SDNode *N);
206 bool tryMVEIndexedLoad(SDNode *N);
207 bool tryFMULFixed(SDNode *N, SDLoc dl);
208 bool tryFP_TO_INT(SDNode *N, SDLoc dl);
209 bool transformFixedFloatingPointConversion(SDNode *N, SDNode *FMul,
210 bool IsUnsigned,
211 bool FixedToFloat);
212
213 /// SelectVLD - Select NEON load intrinsics. NumVecs should be
214 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
215 /// loads of D registers and even subregs and odd subregs of Q registers.
216 /// For NumVecs <= 2, QOpcodes1 is not used.
217 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
218 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
219 const uint16_t *QOpcodes1);
220
221 /// SelectVST - Select NEON store intrinsics. NumVecs should
222 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
223 /// stores of D registers and even subregs and odd subregs of Q registers.
224 /// For NumVecs <= 2, QOpcodes1 is not used.
225 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
226 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
227 const uint16_t *QOpcodes1);
228
229 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
230 /// be 2, 3 or 4. The opcode arrays specify the instructions used for
231 /// load/store of D registers and Q registers.
232 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
233 unsigned NumVecs, const uint16_t *DOpcodes,
234 const uint16_t *QOpcodes);
235
236 /// Helper functions for setting up clusters of MVE predication operands.
237 template <typename SDValueVector>
238 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
239 SDValue PredicateMask);
240 template <typename SDValueVector>
241 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
242 SDValue PredicateMask, SDValue Inactive);
243
244 template <typename SDValueVector>
245 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc);
246 template <typename SDValueVector>
247 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy);
248
249 /// SelectMVE_WB - Select MVE writeback load/store intrinsics.
250 void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated);
251
252 /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics.
253 void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate,
254 bool HasSaturationOperand);
255
256 /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics.
257 void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
258 uint16_t OpcodeWithNoCarry, bool Add, bool Predicated);
259
260 /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between
261 /// vector lanes.
262 void SelectMVE_VSHLC(SDNode *N, bool Predicated);
263
264 /// Select long MVE vector reductions with two vector operands
265 /// Stride is the number of vector element widths the instruction can operate
266 /// on:
267 /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32]
268 /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32]
269 /// Stride is used when addressing the OpcodesS array which contains multiple
270 /// opcodes for each element width.
271 /// TySize is the index into the list of element types listed above
272 void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
273 const uint16_t *OpcodesS, const uint16_t *OpcodesU,
274 size_t Stride, size_t TySize);
275
276 /// Select a 64-bit MVE vector reduction with two vector operands
277 /// arm_mve_vmlldava_[predicated]
278 void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
279 const uint16_t *OpcodesU);
280 /// Select a 72-bit MVE vector rounding reduction with two vector operands
281 /// int_arm_mve_vrmlldavha[_predicated]
282 void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
283 const uint16_t *OpcodesU);
284
285 /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs
286 /// should be 2 or 4. The opcode array specifies the instructions
287 /// used for 8, 16 and 32-bit lane sizes respectively, and each
288 /// pointer points to a set of NumVecs sub-opcodes used for the
289 /// different stages (e.g. VLD20 versus VLD21) of each load family.
290 void SelectMVE_VLD(SDNode *N, unsigned NumVecs,
291 const uint16_t *const *Opcodes, bool HasWriteback);
292
293 /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an
294 /// array of 3 elements for the 8, 16 and 32-bit lane sizes.
295 void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
296 bool Wrapping, bool Predicated);
297
298 /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D,
299 /// CX1DA, CX2D, CX2DA, CX3, CX3DA).
300 /// \arg \c NumExtraOps number of extra operands besides the coprocossor,
301 /// the accumulator and the immediate operand, i.e. 0
302 /// for CX1*, 1 for CX2*, 2 for CX3*
303 /// \arg \c HasAccum whether the instruction has an accumulator operand
304 void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps,
305 bool HasAccum);
306
307 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
308 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
309 /// for loading D registers.
310 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
311 unsigned NumVecs, const uint16_t *DOpcodes,
312 const uint16_t *QOpcodes0 = nullptr,
313 const uint16_t *QOpcodes1 = nullptr);
314
315 /// Try to select SBFX/UBFX instructions for ARM.
316 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
317
318 bool tryInsertVectorElt(SDNode *N);
319
320 // Select special operations if node forms integer ABS pattern
321 bool tryABSOp(SDNode *N);
322
323 bool tryReadRegister(SDNode *N);
324 bool tryWriteRegister(SDNode *N);
325
326 bool tryInlineAsm(SDNode *N);
327
328 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
329
330 void SelectCMP_SWAP(SDNode *N);
331
332 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
333 /// inline asm expressions.
334 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
335 InlineAsm::ConstraintCode ConstraintID,
336 std::vector<SDValue> &OutOps) override;
337
338 // Form pairs of consecutive R, S, D, or Q registers.
339 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
340 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
341 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
342 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
343
344 // Form sequences of 4 consecutive S, D, or Q registers.
345 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
346 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
347 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
348
349 // Get the alignment operand for a NEON VLD or VST instruction.
350 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
351 bool is64BitVector);
352
353 /// Checks if N is a multiplication by a constant where we can extract out a
354 /// power of two from the constant so that it can be used in a shift, but only
355 /// if it simplifies the materialization of the constant. Returns true if it
356 /// is, and assigns to PowerOfTwo the power of two that should be extracted
357 /// out and to NewMulConst the new constant to be multiplied by.
358 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
359 unsigned &PowerOfTwo, SDValue &NewMulConst) const;
360
361 /// Replace N with M in CurDAG, in a way that also ensures that M gets
362 /// selected when N would have been selected.
363 void replaceDAGValue(const SDValue &N, SDValue M);
364 };
365 }
366
367 char ARMDAGToDAGISel::ID = 0;
368
INITIALIZE_PASS(ARMDAGToDAGISel,DEBUG_TYPE,PASS_NAME,false,false)369 INITIALIZE_PASS(ARMDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false)
370
371 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
372 /// operand. If so Imm will receive the 32-bit value.
373 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
374 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
375 Imm = N->getAsZExtVal();
376 return true;
377 }
378 return false;
379 }
380
381 // isInt32Immediate - This method tests to see if a constant operand.
382 // If so Imm will receive the 32 bit value.
isInt32Immediate(SDValue N,unsigned & Imm)383 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
384 return isInt32Immediate(N.getNode(), Imm);
385 }
386
387 // isOpcWithIntImmediate - This method tests to see if the node is a specific
388 // opcode and that it has a immediate integer right operand.
389 // If so Imm will receive the 32 bit value.
isOpcWithIntImmediate(SDNode * N,unsigned Opc,unsigned & Imm)390 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
391 return N->getOpcode() == Opc &&
392 isInt32Immediate(N->getOperand(1).getNode(), Imm);
393 }
394
395 /// Check whether a particular node is a constant value representable as
396 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
397 ///
398 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
isScaledConstantInRange(SDValue Node,int Scale,int RangeMin,int RangeMax,int & ScaledConstant)399 static bool isScaledConstantInRange(SDValue Node, int Scale,
400 int RangeMin, int RangeMax,
401 int &ScaledConstant) {
402 assert(Scale > 0 && "Invalid scale!");
403
404 // Check that this is a constant.
405 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
406 if (!C)
407 return false;
408
409 ScaledConstant = (int) C->getZExtValue();
410 if ((ScaledConstant % Scale) != 0)
411 return false;
412
413 ScaledConstant /= Scale;
414 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
415 }
416
PreprocessISelDAG()417 void ARMDAGToDAGISel::PreprocessISelDAG() {
418 if (!Subtarget->hasV6T2Ops())
419 return;
420
421 bool isThumb2 = Subtarget->isThumb();
422 // We use make_early_inc_range to avoid invalidation issues.
423 for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) {
424 if (N.getOpcode() != ISD::ADD)
425 continue;
426
427 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
428 // leading zeros, followed by consecutive set bits, followed by 1 or 2
429 // trailing zeros, e.g. 1020.
430 // Transform the expression to
431 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
432 // of trailing zeros of c2. The left shift would be folded as an shifter
433 // operand of 'add' and the 'and' and 'srl' would become a bits extraction
434 // node (UBFX).
435
436 SDValue N0 = N.getOperand(0);
437 SDValue N1 = N.getOperand(1);
438 unsigned And_imm = 0;
439 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
440 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
441 std::swap(N0, N1);
442 }
443 if (!And_imm)
444 continue;
445
446 // Check if the AND mask is an immediate of the form: 000.....1111111100
447 unsigned TZ = llvm::countr_zero(And_imm);
448 if (TZ != 1 && TZ != 2)
449 // Be conservative here. Shifter operands aren't always free. e.g. On
450 // Swift, left shifter operand of 1 / 2 for free but others are not.
451 // e.g.
452 // ubfx r3, r1, #16, #8
453 // ldr.w r3, [r0, r3, lsl #2]
454 // vs.
455 // mov.w r9, #1020
456 // and.w r2, r9, r1, lsr #14
457 // ldr r2, [r0, r2]
458 continue;
459 And_imm >>= TZ;
460 if (And_imm & (And_imm + 1))
461 continue;
462
463 // Look for (and (srl X, c1), c2).
464 SDValue Srl = N1.getOperand(0);
465 unsigned Srl_imm = 0;
466 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
467 (Srl_imm <= 2))
468 continue;
469
470 // Make sure first operand is not a shifter operand which would prevent
471 // folding of the left shift.
472 SDValue CPTmp0;
473 SDValue CPTmp1;
474 SDValue CPTmp2;
475 if (isThumb2) {
476 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
477 continue;
478 } else {
479 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
480 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
481 continue;
482 }
483
484 // Now make the transformation.
485 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
486 Srl.getOperand(0),
487 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
488 MVT::i32));
489 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
490 Srl,
491 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
492 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
493 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
494 CurDAG->UpdateNodeOperands(&N, N0, N1);
495 }
496 }
497
498 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
499 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
500 /// least on current ARM implementations) which should be avoidded.
hasNoVMLxHazardUse(SDNode * N) const501 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
502 if (OptLevel == CodeGenOptLevel::None)
503 return true;
504
505 if (!Subtarget->hasVMLxHazards())
506 return true;
507
508 if (!N->hasOneUse())
509 return false;
510
511 SDNode *Use = *N->use_begin();
512 if (Use->getOpcode() == ISD::CopyToReg)
513 return true;
514 if (Use->isMachineOpcode()) {
515 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
516 CurDAG->getSubtarget().getInstrInfo());
517
518 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
519 if (MCID.mayStore())
520 return true;
521 unsigned Opcode = MCID.getOpcode();
522 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
523 return true;
524 // vmlx feeding into another vmlx. We actually want to unfold
525 // the use later in the MLxExpansion pass. e.g.
526 // vmla
527 // vmla (stall 8 cycles)
528 //
529 // vmul (5 cycles)
530 // vadd (5 cycles)
531 // vmla
532 // This adds up to about 18 - 19 cycles.
533 //
534 // vmla
535 // vmul (stall 4 cycles)
536 // vadd adds up to about 14 cycles.
537 return TII->isFpMLxInstruction(Opcode);
538 }
539
540 return false;
541 }
542
isShifterOpProfitable(const SDValue & Shift,ARM_AM::ShiftOpc ShOpcVal,unsigned ShAmt)543 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
544 ARM_AM::ShiftOpc ShOpcVal,
545 unsigned ShAmt) {
546 if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
547 return true;
548 if (Shift.hasOneUse())
549 return true;
550 // R << 2 is free.
551 return ShOpcVal == ARM_AM::lsl &&
552 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
553 }
554
canExtractShiftFromMul(const SDValue & N,unsigned MaxShift,unsigned & PowerOfTwo,SDValue & NewMulConst) const555 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
556 unsigned MaxShift,
557 unsigned &PowerOfTwo,
558 SDValue &NewMulConst) const {
559 assert(N.getOpcode() == ISD::MUL);
560 assert(MaxShift > 0);
561
562 // If the multiply is used in more than one place then changing the constant
563 // will make other uses incorrect, so don't.
564 if (!N.hasOneUse()) return false;
565 // Check if the multiply is by a constant
566 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
567 if (!MulConst) return false;
568 // If the constant is used in more than one place then modifying it will mean
569 // we need to materialize two constants instead of one, which is a bad idea.
570 if (!MulConst->hasOneUse()) return false;
571 unsigned MulConstVal = MulConst->getZExtValue();
572 if (MulConstVal == 0) return false;
573
574 // Find the largest power of 2 that MulConstVal is a multiple of
575 PowerOfTwo = MaxShift;
576 while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
577 --PowerOfTwo;
578 if (PowerOfTwo == 0) return false;
579 }
580
581 // Only optimise if the new cost is better
582 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
583 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
584 unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget);
585 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget);
586 return NewCost < OldCost;
587 }
588
replaceDAGValue(const SDValue & N,SDValue M)589 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
590 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
591 ReplaceUses(N, M);
592 }
593
SelectImmShifterOperand(SDValue N,SDValue & BaseReg,SDValue & Opc,bool CheckProfitability)594 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
595 SDValue &BaseReg,
596 SDValue &Opc,
597 bool CheckProfitability) {
598 if (DisableShifterOp)
599 return false;
600
601 // If N is a multiply-by-constant and it's profitable to extract a shift and
602 // use it in a shifted operand do so.
603 if (N.getOpcode() == ISD::MUL) {
604 unsigned PowerOfTwo = 0;
605 SDValue NewMulConst;
606 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
607 HandleSDNode Handle(N);
608 SDLoc Loc(N);
609 replaceDAGValue(N.getOperand(1), NewMulConst);
610 BaseReg = Handle.getValue();
611 Opc = CurDAG->getTargetConstant(
612 ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
613 return true;
614 }
615 }
616
617 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
618
619 // Don't match base register only case. That is matched to a separate
620 // lower complexity pattern with explicit register operand.
621 if (ShOpcVal == ARM_AM::no_shift) return false;
622
623 BaseReg = N.getOperand(0);
624 unsigned ShImmVal = 0;
625 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
626 if (!RHS) return false;
627 ShImmVal = RHS->getZExtValue() & 31;
628 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
629 SDLoc(N), MVT::i32);
630 return true;
631 }
632
SelectRegShifterOperand(SDValue N,SDValue & BaseReg,SDValue & ShReg,SDValue & Opc,bool CheckProfitability)633 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
634 SDValue &BaseReg,
635 SDValue &ShReg,
636 SDValue &Opc,
637 bool CheckProfitability) {
638 if (DisableShifterOp)
639 return false;
640
641 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
642
643 // Don't match base register only case. That is matched to a separate
644 // lower complexity pattern with explicit register operand.
645 if (ShOpcVal == ARM_AM::no_shift) return false;
646
647 BaseReg = N.getOperand(0);
648 unsigned ShImmVal = 0;
649 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
650 if (RHS) return false;
651
652 ShReg = N.getOperand(1);
653 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
654 return false;
655 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
656 SDLoc(N), MVT::i32);
657 return true;
658 }
659
660 // Determine whether an ISD::OR's operands are suitable to turn the operation
661 // into an addition, which often has more compact encodings.
SelectAddLikeOr(SDNode * Parent,SDValue N,SDValue & Out)662 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
663 assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
664 Out = N;
665 return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
666 }
667
668
SelectAddrModeImm12(SDValue N,SDValue & Base,SDValue & OffImm)669 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
670 SDValue &Base,
671 SDValue &OffImm) {
672 // Match simple R + imm12 operands.
673
674 // Base only.
675 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
676 !CurDAG->isBaseWithConstantOffset(N)) {
677 if (N.getOpcode() == ISD::FrameIndex) {
678 // Match frame index.
679 int FI = cast<FrameIndexSDNode>(N)->getIndex();
680 Base = CurDAG->getTargetFrameIndex(
681 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
682 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
683 return true;
684 }
685
686 if (N.getOpcode() == ARMISD::Wrapper &&
687 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
688 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
689 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
690 Base = N.getOperand(0);
691 } else
692 Base = N;
693 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
694 return true;
695 }
696
697 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
698 int RHSC = (int)RHS->getSExtValue();
699 if (N.getOpcode() == ISD::SUB)
700 RHSC = -RHSC;
701
702 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
703 Base = N.getOperand(0);
704 if (Base.getOpcode() == ISD::FrameIndex) {
705 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
706 Base = CurDAG->getTargetFrameIndex(
707 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
708 }
709 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
710 return true;
711 }
712 }
713
714 // Base only.
715 Base = N;
716 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
717 return true;
718 }
719
720
721
SelectLdStSOReg(SDValue N,SDValue & Base,SDValue & Offset,SDValue & Opc)722 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
723 SDValue &Opc) {
724 if (N.getOpcode() == ISD::MUL &&
725 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
726 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
727 // X * [3,5,9] -> X + X * [2,4,8] etc.
728 int RHSC = (int)RHS->getZExtValue();
729 if (RHSC & 1) {
730 RHSC = RHSC & ~1;
731 ARM_AM::AddrOpc AddSub = ARM_AM::add;
732 if (RHSC < 0) {
733 AddSub = ARM_AM::sub;
734 RHSC = - RHSC;
735 }
736 if (isPowerOf2_32(RHSC)) {
737 unsigned ShAmt = Log2_32(RHSC);
738 Base = Offset = N.getOperand(0);
739 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
740 ARM_AM::lsl),
741 SDLoc(N), MVT::i32);
742 return true;
743 }
744 }
745 }
746 }
747
748 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
749 // ISD::OR that is equivalent to an ISD::ADD.
750 !CurDAG->isBaseWithConstantOffset(N))
751 return false;
752
753 // Leave simple R +/- imm12 operands for LDRi12
754 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
755 int RHSC;
756 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
757 -0x1000+1, 0x1000, RHSC)) // 12 bits.
758 return false;
759 }
760
761 // Otherwise this is R +/- [possibly shifted] R.
762 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
763 ARM_AM::ShiftOpc ShOpcVal =
764 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
765 unsigned ShAmt = 0;
766
767 Base = N.getOperand(0);
768 Offset = N.getOperand(1);
769
770 if (ShOpcVal != ARM_AM::no_shift) {
771 // Check to see if the RHS of the shift is a constant, if not, we can't fold
772 // it.
773 if (ConstantSDNode *Sh =
774 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
775 ShAmt = Sh->getZExtValue();
776 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
777 Offset = N.getOperand(1).getOperand(0);
778 else {
779 ShAmt = 0;
780 ShOpcVal = ARM_AM::no_shift;
781 }
782 } else {
783 ShOpcVal = ARM_AM::no_shift;
784 }
785 }
786
787 // Try matching (R shl C) + (R).
788 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
789 !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
790 N.getOperand(0).hasOneUse())) {
791 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
792 if (ShOpcVal != ARM_AM::no_shift) {
793 // Check to see if the RHS of the shift is a constant, if not, we can't
794 // fold it.
795 if (ConstantSDNode *Sh =
796 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
797 ShAmt = Sh->getZExtValue();
798 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
799 Offset = N.getOperand(0).getOperand(0);
800 Base = N.getOperand(1);
801 } else {
802 ShAmt = 0;
803 ShOpcVal = ARM_AM::no_shift;
804 }
805 } else {
806 ShOpcVal = ARM_AM::no_shift;
807 }
808 }
809 }
810
811 // If Offset is a multiply-by-constant and it's profitable to extract a shift
812 // and use it in a shifted operand do so.
813 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
814 unsigned PowerOfTwo = 0;
815 SDValue NewMulConst;
816 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
817 HandleSDNode Handle(Offset);
818 replaceDAGValue(Offset.getOperand(1), NewMulConst);
819 Offset = Handle.getValue();
820 ShAmt = PowerOfTwo;
821 ShOpcVal = ARM_AM::lsl;
822 }
823 }
824
825 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
826 SDLoc(N), MVT::i32);
827 return true;
828 }
829
SelectAddrMode2OffsetReg(SDNode * Op,SDValue N,SDValue & Offset,SDValue & Opc)830 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
831 SDValue &Offset, SDValue &Opc) {
832 unsigned Opcode = Op->getOpcode();
833 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
834 ? cast<LoadSDNode>(Op)->getAddressingMode()
835 : cast<StoreSDNode>(Op)->getAddressingMode();
836 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
837 ? ARM_AM::add : ARM_AM::sub;
838 int Val;
839 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
840 return false;
841
842 Offset = N;
843 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
844 unsigned ShAmt = 0;
845 if (ShOpcVal != ARM_AM::no_shift) {
846 // Check to see if the RHS of the shift is a constant, if not, we can't fold
847 // it.
848 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
849 ShAmt = Sh->getZExtValue();
850 if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
851 Offset = N.getOperand(0);
852 else {
853 ShAmt = 0;
854 ShOpcVal = ARM_AM::no_shift;
855 }
856 } else {
857 ShOpcVal = ARM_AM::no_shift;
858 }
859 }
860
861 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
862 SDLoc(N), MVT::i32);
863 return true;
864 }
865
SelectAddrMode2OffsetImmPre(SDNode * Op,SDValue N,SDValue & Offset,SDValue & Opc)866 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
867 SDValue &Offset, SDValue &Opc) {
868 unsigned Opcode = Op->getOpcode();
869 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
870 ? cast<LoadSDNode>(Op)->getAddressingMode()
871 : cast<StoreSDNode>(Op)->getAddressingMode();
872 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
873 ? ARM_AM::add : ARM_AM::sub;
874 int Val;
875 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
876 if (AddSub == ARM_AM::sub) Val *= -1;
877 Offset = CurDAG->getRegister(0, MVT::i32);
878 Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
879 return true;
880 }
881
882 return false;
883 }
884
885
SelectAddrMode2OffsetImm(SDNode * Op,SDValue N,SDValue & Offset,SDValue & Opc)886 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
887 SDValue &Offset, SDValue &Opc) {
888 unsigned Opcode = Op->getOpcode();
889 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
890 ? cast<LoadSDNode>(Op)->getAddressingMode()
891 : cast<StoreSDNode>(Op)->getAddressingMode();
892 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
893 ? ARM_AM::add : ARM_AM::sub;
894 int Val;
895 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
896 Offset = CurDAG->getRegister(0, MVT::i32);
897 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
898 ARM_AM::no_shift),
899 SDLoc(Op), MVT::i32);
900 return true;
901 }
902
903 return false;
904 }
905
SelectAddrOffsetNone(SDValue N,SDValue & Base)906 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
907 Base = N;
908 return true;
909 }
910
SelectAddrMode3(SDValue N,SDValue & Base,SDValue & Offset,SDValue & Opc)911 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
912 SDValue &Base, SDValue &Offset,
913 SDValue &Opc) {
914 if (N.getOpcode() == ISD::SUB) {
915 // X - C is canonicalize to X + -C, no need to handle it here.
916 Base = N.getOperand(0);
917 Offset = N.getOperand(1);
918 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
919 MVT::i32);
920 return true;
921 }
922
923 if (!CurDAG->isBaseWithConstantOffset(N)) {
924 Base = N;
925 if (N.getOpcode() == ISD::FrameIndex) {
926 int FI = cast<FrameIndexSDNode>(N)->getIndex();
927 Base = CurDAG->getTargetFrameIndex(
928 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
929 }
930 Offset = CurDAG->getRegister(0, MVT::i32);
931 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
932 MVT::i32);
933 return true;
934 }
935
936 // If the RHS is +/- imm8, fold into addr mode.
937 int RHSC;
938 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
939 -256 + 1, 256, RHSC)) { // 8 bits.
940 Base = N.getOperand(0);
941 if (Base.getOpcode() == ISD::FrameIndex) {
942 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
943 Base = CurDAG->getTargetFrameIndex(
944 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
945 }
946 Offset = CurDAG->getRegister(0, MVT::i32);
947
948 ARM_AM::AddrOpc AddSub = ARM_AM::add;
949 if (RHSC < 0) {
950 AddSub = ARM_AM::sub;
951 RHSC = -RHSC;
952 }
953 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
954 MVT::i32);
955 return true;
956 }
957
958 Base = N.getOperand(0);
959 Offset = N.getOperand(1);
960 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
961 MVT::i32);
962 return true;
963 }
964
SelectAddrMode3Offset(SDNode * Op,SDValue N,SDValue & Offset,SDValue & Opc)965 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
966 SDValue &Offset, SDValue &Opc) {
967 unsigned Opcode = Op->getOpcode();
968 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
969 ? cast<LoadSDNode>(Op)->getAddressingMode()
970 : cast<StoreSDNode>(Op)->getAddressingMode();
971 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
972 ? ARM_AM::add : ARM_AM::sub;
973 int Val;
974 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
975 Offset = CurDAG->getRegister(0, MVT::i32);
976 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
977 MVT::i32);
978 return true;
979 }
980
981 Offset = N;
982 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
983 MVT::i32);
984 return true;
985 }
986
IsAddressingMode5(SDValue N,SDValue & Base,SDValue & Offset,bool FP16)987 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
988 bool FP16) {
989 if (!CurDAG->isBaseWithConstantOffset(N)) {
990 Base = N;
991 if (N.getOpcode() == ISD::FrameIndex) {
992 int FI = cast<FrameIndexSDNode>(N)->getIndex();
993 Base = CurDAG->getTargetFrameIndex(
994 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
995 } else if (N.getOpcode() == ARMISD::Wrapper &&
996 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
997 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
998 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
999 Base = N.getOperand(0);
1000 }
1001 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1002 SDLoc(N), MVT::i32);
1003 return true;
1004 }
1005
1006 // If the RHS is +/- imm8, fold into addr mode.
1007 int RHSC;
1008 const int Scale = FP16 ? 2 : 4;
1009
1010 if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) {
1011 Base = N.getOperand(0);
1012 if (Base.getOpcode() == ISD::FrameIndex) {
1013 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1014 Base = CurDAG->getTargetFrameIndex(
1015 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1016 }
1017
1018 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1019 if (RHSC < 0) {
1020 AddSub = ARM_AM::sub;
1021 RHSC = -RHSC;
1022 }
1023
1024 if (FP16)
1025 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
1026 SDLoc(N), MVT::i32);
1027 else
1028 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
1029 SDLoc(N), MVT::i32);
1030
1031 return true;
1032 }
1033
1034 Base = N;
1035
1036 if (FP16)
1037 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
1038 SDLoc(N), MVT::i32);
1039 else
1040 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1041 SDLoc(N), MVT::i32);
1042
1043 return true;
1044 }
1045
SelectAddrMode5(SDValue N,SDValue & Base,SDValue & Offset)1046 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1047 SDValue &Base, SDValue &Offset) {
1048 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
1049 }
1050
SelectAddrMode5FP16(SDValue N,SDValue & Base,SDValue & Offset)1051 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
1052 SDValue &Base, SDValue &Offset) {
1053 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
1054 }
1055
SelectAddrMode6(SDNode * Parent,SDValue N,SDValue & Addr,SDValue & Align)1056 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1057 SDValue &Align) {
1058 Addr = N;
1059
1060 unsigned Alignment = 0;
1061
1062 MemSDNode *MemN = cast<MemSDNode>(Parent);
1063
1064 if (isa<LSBaseSDNode>(MemN) ||
1065 ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1066 MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1067 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
1068 // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1069 // The maximum alignment is equal to the memory size being referenced.
1070 llvm::Align MMOAlign = MemN->getAlign();
1071 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1072 if (MMOAlign.value() >= MemSize && MemSize > 1)
1073 Alignment = MemSize;
1074 } else {
1075 // All other uses of addrmode6 are for intrinsics. For now just record
1076 // the raw alignment value; it will be refined later based on the legal
1077 // alignment operands for the intrinsic.
1078 Alignment = MemN->getAlign().value();
1079 }
1080
1081 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1082 return true;
1083 }
1084
SelectAddrMode6Offset(SDNode * Op,SDValue N,SDValue & Offset)1085 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1086 SDValue &Offset) {
1087 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1088 ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1089 if (AM != ISD::POST_INC)
1090 return false;
1091 Offset = N;
1092 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1093 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1094 Offset = CurDAG->getRegister(0, MVT::i32);
1095 }
1096 return true;
1097 }
1098
SelectAddrModePC(SDValue N,SDValue & Offset,SDValue & Label)1099 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1100 SDValue &Offset, SDValue &Label) {
1101 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1102 Offset = N.getOperand(0);
1103 SDValue N1 = N.getOperand(1);
1104 Label = CurDAG->getTargetConstant(N1->getAsZExtVal(), SDLoc(N), MVT::i32);
1105 return true;
1106 }
1107
1108 return false;
1109 }
1110
1111
1112 //===----------------------------------------------------------------------===//
1113 // Thumb Addressing Modes
1114 //===----------------------------------------------------------------------===//
1115
shouldUseZeroOffsetLdSt(SDValue N)1116 static bool shouldUseZeroOffsetLdSt(SDValue N) {
1117 // Negative numbers are difficult to materialise in thumb1. If we are
1118 // selecting the add of a negative, instead try to select ri with a zero
1119 // offset, so create the add node directly which will become a sub.
1120 if (N.getOpcode() != ISD::ADD)
1121 return false;
1122
1123 // Look for an imm which is not legal for ld/st, but is legal for sub.
1124 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1125 return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
1126
1127 return false;
1128 }
1129
SelectThumbAddrModeRRSext(SDValue N,SDValue & Base,SDValue & Offset)1130 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1131 SDValue &Offset) {
1132 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1133 if (!isNullConstant(N))
1134 return false;
1135
1136 Base = Offset = N;
1137 return true;
1138 }
1139
1140 Base = N.getOperand(0);
1141 Offset = N.getOperand(1);
1142 return true;
1143 }
1144
SelectThumbAddrModeRR(SDValue N,SDValue & Base,SDValue & Offset)1145 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1146 SDValue &Offset) {
1147 if (shouldUseZeroOffsetLdSt(N))
1148 return false; // Select ri instead
1149 return SelectThumbAddrModeRRSext(N, Base, Offset);
1150 }
1151
1152 bool
SelectThumbAddrModeImm5S(SDValue N,unsigned Scale,SDValue & Base,SDValue & OffImm)1153 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1154 SDValue &Base, SDValue &OffImm) {
1155 if (shouldUseZeroOffsetLdSt(N)) {
1156 Base = N;
1157 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1158 return true;
1159 }
1160
1161 if (!CurDAG->isBaseWithConstantOffset(N)) {
1162 if (N.getOpcode() == ISD::ADD) {
1163 return false; // We want to select register offset instead
1164 } else if (N.getOpcode() == ARMISD::Wrapper &&
1165 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1166 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1167 N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1168 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1169 Base = N.getOperand(0);
1170 } else {
1171 Base = N;
1172 }
1173
1174 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1175 return true;
1176 }
1177
1178 // If the RHS is + imm5 * scale, fold into addr mode.
1179 int RHSC;
1180 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1181 Base = N.getOperand(0);
1182 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1183 return true;
1184 }
1185
1186 // Offset is too large, so use register offset instead.
1187 return false;
1188 }
1189
1190 bool
SelectThumbAddrModeImm5S4(SDValue N,SDValue & Base,SDValue & OffImm)1191 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1192 SDValue &OffImm) {
1193 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1194 }
1195
1196 bool
SelectThumbAddrModeImm5S2(SDValue N,SDValue & Base,SDValue & OffImm)1197 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1198 SDValue &OffImm) {
1199 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1200 }
1201
1202 bool
SelectThumbAddrModeImm5S1(SDValue N,SDValue & Base,SDValue & OffImm)1203 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1204 SDValue &OffImm) {
1205 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1206 }
1207
SelectThumbAddrModeSP(SDValue N,SDValue & Base,SDValue & OffImm)1208 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1209 SDValue &Base, SDValue &OffImm) {
1210 if (N.getOpcode() == ISD::FrameIndex) {
1211 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1212 // Only multiples of 4 are allowed for the offset, so the frame object
1213 // alignment must be at least 4.
1214 MachineFrameInfo &MFI = MF->getFrameInfo();
1215 if (MFI.getObjectAlign(FI) < Align(4))
1216 MFI.setObjectAlignment(FI, Align(4));
1217 Base = CurDAG->getTargetFrameIndex(
1218 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1219 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1220 return true;
1221 }
1222
1223 if (!CurDAG->isBaseWithConstantOffset(N))
1224 return false;
1225
1226 if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
1227 // If the RHS is + imm8 * scale, fold into addr mode.
1228 int RHSC;
1229 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1230 Base = N.getOperand(0);
1231 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1232 // Make sure the offset is inside the object, or we might fail to
1233 // allocate an emergency spill slot. (An out-of-range access is UB, but
1234 // it could show up anyway.)
1235 MachineFrameInfo &MFI = MF->getFrameInfo();
1236 if (RHSC * 4 < MFI.getObjectSize(FI)) {
1237 // For LHS+RHS to result in an offset that's a multiple of 4 the object
1238 // indexed by the LHS must be 4-byte aligned.
1239 if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlign(FI) < Align(4))
1240 MFI.setObjectAlignment(FI, Align(4));
1241 if (MFI.getObjectAlign(FI) >= Align(4)) {
1242 Base = CurDAG->getTargetFrameIndex(
1243 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1244 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1245 return true;
1246 }
1247 }
1248 }
1249 }
1250
1251 return false;
1252 }
1253
1254 template <unsigned Shift>
SelectTAddrModeImm7(SDValue N,SDValue & Base,SDValue & OffImm)1255 bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base,
1256 SDValue &OffImm) {
1257 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1258 int RHSC;
1259 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1260 RHSC)) {
1261 Base = N.getOperand(0);
1262 if (N.getOpcode() == ISD::SUB)
1263 RHSC = -RHSC;
1264 OffImm =
1265 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1266 return true;
1267 }
1268 }
1269
1270 // Base only.
1271 Base = N;
1272 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1273 return true;
1274 }
1275
1276
1277 //===----------------------------------------------------------------------===//
1278 // Thumb 2 Addressing Modes
1279 //===----------------------------------------------------------------------===//
1280
1281
SelectT2AddrModeImm12(SDValue N,SDValue & Base,SDValue & OffImm)1282 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1283 SDValue &Base, SDValue &OffImm) {
1284 // Match simple R + imm12 operands.
1285
1286 // Base only.
1287 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1288 !CurDAG->isBaseWithConstantOffset(N)) {
1289 if (N.getOpcode() == ISD::FrameIndex) {
1290 // Match frame index.
1291 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1292 Base = CurDAG->getTargetFrameIndex(
1293 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1294 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1295 return true;
1296 }
1297
1298 if (N.getOpcode() == ARMISD::Wrapper &&
1299 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1300 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1301 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1302 Base = N.getOperand(0);
1303 if (Base.getOpcode() == ISD::TargetConstantPool)
1304 return false; // We want to select t2LDRpci instead.
1305 } else
1306 Base = N;
1307 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1308 return true;
1309 }
1310
1311 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1312 if (SelectT2AddrModeImm8(N, Base, OffImm))
1313 // Let t2LDRi8 handle (R - imm8).
1314 return false;
1315
1316 int RHSC = (int)RHS->getZExtValue();
1317 if (N.getOpcode() == ISD::SUB)
1318 RHSC = -RHSC;
1319
1320 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1321 Base = N.getOperand(0);
1322 if (Base.getOpcode() == ISD::FrameIndex) {
1323 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1324 Base = CurDAG->getTargetFrameIndex(
1325 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1326 }
1327 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1328 return true;
1329 }
1330 }
1331
1332 // Base only.
1333 Base = N;
1334 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1335 return true;
1336 }
1337
1338 template <unsigned Shift>
SelectT2AddrModeImm8(SDValue N,SDValue & Base,SDValue & OffImm)1339 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base,
1340 SDValue &OffImm) {
1341 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1342 int RHSC;
1343 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) {
1344 Base = N.getOperand(0);
1345 if (Base.getOpcode() == ISD::FrameIndex) {
1346 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1347 Base = CurDAG->getTargetFrameIndex(
1348 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1349 }
1350
1351 if (N.getOpcode() == ISD::SUB)
1352 RHSC = -RHSC;
1353 OffImm =
1354 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1355 return true;
1356 }
1357 }
1358
1359 // Base only.
1360 Base = N;
1361 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1362 return true;
1363 }
1364
SelectT2AddrModeImm8(SDValue N,SDValue & Base,SDValue & OffImm)1365 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1366 SDValue &Base, SDValue &OffImm) {
1367 // Match simple R - imm8 operands.
1368 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1369 !CurDAG->isBaseWithConstantOffset(N))
1370 return false;
1371
1372 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1373 int RHSC = (int)RHS->getSExtValue();
1374 if (N.getOpcode() == ISD::SUB)
1375 RHSC = -RHSC;
1376
1377 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1378 Base = N.getOperand(0);
1379 if (Base.getOpcode() == ISD::FrameIndex) {
1380 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1381 Base = CurDAG->getTargetFrameIndex(
1382 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1383 }
1384 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1385 return true;
1386 }
1387 }
1388
1389 return false;
1390 }
1391
SelectT2AddrModeImm8Offset(SDNode * Op,SDValue N,SDValue & OffImm)1392 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1393 SDValue &OffImm){
1394 unsigned Opcode = Op->getOpcode();
1395 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1396 ? cast<LoadSDNode>(Op)->getAddressingMode()
1397 : cast<StoreSDNode>(Op)->getAddressingMode();
1398 int RHSC;
1399 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1400 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1401 ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1402 : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1403 return true;
1404 }
1405
1406 return false;
1407 }
1408
1409 template <unsigned Shift>
SelectT2AddrModeImm7(SDValue N,SDValue & Base,SDValue & OffImm)1410 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base,
1411 SDValue &OffImm) {
1412 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1413 int RHSC;
1414 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1415 RHSC)) {
1416 Base = N.getOperand(0);
1417 if (Base.getOpcode() == ISD::FrameIndex) {
1418 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1419 Base = CurDAG->getTargetFrameIndex(
1420 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1421 }
1422
1423 if (N.getOpcode() == ISD::SUB)
1424 RHSC = -RHSC;
1425 OffImm =
1426 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1427 return true;
1428 }
1429 }
1430
1431 // Base only.
1432 Base = N;
1433 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1434 return true;
1435 }
1436
1437 template <unsigned Shift>
SelectT2AddrModeImm7Offset(SDNode * Op,SDValue N,SDValue & OffImm)1438 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1439 SDValue &OffImm) {
1440 return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);
1441 }
1442
SelectT2AddrModeImm7Offset(SDNode * Op,SDValue N,SDValue & OffImm,unsigned Shift)1443 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1444 SDValue &OffImm,
1445 unsigned Shift) {
1446 unsigned Opcode = Op->getOpcode();
1447 ISD::MemIndexedMode AM;
1448 switch (Opcode) {
1449 case ISD::LOAD:
1450 AM = cast<LoadSDNode>(Op)->getAddressingMode();
1451 break;
1452 case ISD::STORE:
1453 AM = cast<StoreSDNode>(Op)->getAddressingMode();
1454 break;
1455 case ISD::MLOAD:
1456 AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode();
1457 break;
1458 case ISD::MSTORE:
1459 AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode();
1460 break;
1461 default:
1462 llvm_unreachable("Unexpected Opcode for Imm7Offset");
1463 }
1464
1465 int RHSC;
1466 // 7 bit constant, shifted by Shift.
1467 if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) {
1468 OffImm =
1469 ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1470 ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32)
1471 : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N),
1472 MVT::i32);
1473 return true;
1474 }
1475 return false;
1476 }
1477
1478 template <int Min, int Max>
SelectImmediateInRange(SDValue N,SDValue & OffImm)1479 bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) {
1480 int Val;
1481 if (isScaledConstantInRange(N, 1, Min, Max, Val)) {
1482 OffImm = CurDAG->getTargetConstant(Val, SDLoc(N), MVT::i32);
1483 return true;
1484 }
1485 return false;
1486 }
1487
SelectT2AddrModeSoReg(SDValue N,SDValue & Base,SDValue & OffReg,SDValue & ShImm)1488 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1489 SDValue &Base,
1490 SDValue &OffReg, SDValue &ShImm) {
1491 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1492 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1493 return false;
1494
1495 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1496 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1497 int RHSC = (int)RHS->getZExtValue();
1498 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1499 return false;
1500 else if (RHSC < 0 && RHSC >= -255) // 8 bits
1501 return false;
1502 }
1503
1504 // Look for (R + R) or (R + (R << [1,2,3])).
1505 unsigned ShAmt = 0;
1506 Base = N.getOperand(0);
1507 OffReg = N.getOperand(1);
1508
1509 // Swap if it is ((R << c) + R).
1510 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1511 if (ShOpcVal != ARM_AM::lsl) {
1512 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1513 if (ShOpcVal == ARM_AM::lsl)
1514 std::swap(Base, OffReg);
1515 }
1516
1517 if (ShOpcVal == ARM_AM::lsl) {
1518 // Check to see if the RHS of the shift is a constant, if not, we can't fold
1519 // it.
1520 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1521 ShAmt = Sh->getZExtValue();
1522 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1523 OffReg = OffReg.getOperand(0);
1524 else {
1525 ShAmt = 0;
1526 }
1527 }
1528 }
1529
1530 // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1531 // and use it in a shifted operand do so.
1532 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1533 unsigned PowerOfTwo = 0;
1534 SDValue NewMulConst;
1535 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1536 HandleSDNode Handle(OffReg);
1537 replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1538 OffReg = Handle.getValue();
1539 ShAmt = PowerOfTwo;
1540 }
1541 }
1542
1543 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1544
1545 return true;
1546 }
1547
SelectT2AddrModeExclusive(SDValue N,SDValue & Base,SDValue & OffImm)1548 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1549 SDValue &OffImm) {
1550 // This *must* succeed since it's used for the irreplaceable ldrex and strex
1551 // instructions.
1552 Base = N;
1553 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1554
1555 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1556 return true;
1557
1558 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1559 if (!RHS)
1560 return true;
1561
1562 uint32_t RHSC = (int)RHS->getZExtValue();
1563 if (RHSC > 1020 || RHSC % 4 != 0)
1564 return true;
1565
1566 Base = N.getOperand(0);
1567 if (Base.getOpcode() == ISD::FrameIndex) {
1568 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1569 Base = CurDAG->getTargetFrameIndex(
1570 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1571 }
1572
1573 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1574 return true;
1575 }
1576
1577 //===--------------------------------------------------------------------===//
1578
1579 /// getAL - Returns a ARMCC::AL immediate node.
getAL(SelectionDAG * CurDAG,const SDLoc & dl)1580 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1581 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1582 }
1583
transferMemOperands(SDNode * N,SDNode * Result)1584 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1585 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1586 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
1587 }
1588
tryARMIndexedLoad(SDNode * N)1589 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1590 LoadSDNode *LD = cast<LoadSDNode>(N);
1591 ISD::MemIndexedMode AM = LD->getAddressingMode();
1592 if (AM == ISD::UNINDEXED)
1593 return false;
1594
1595 EVT LoadedVT = LD->getMemoryVT();
1596 SDValue Offset, AMOpc;
1597 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1598 unsigned Opcode = 0;
1599 bool Match = false;
1600 if (LoadedVT == MVT::i32 && isPre &&
1601 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1602 Opcode = ARM::LDR_PRE_IMM;
1603 Match = true;
1604 } else if (LoadedVT == MVT::i32 && !isPre &&
1605 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1606 Opcode = ARM::LDR_POST_IMM;
1607 Match = true;
1608 } else if (LoadedVT == MVT::i32 &&
1609 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1610 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1611 Match = true;
1612
1613 } else if (LoadedVT == MVT::i16 &&
1614 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1615 Match = true;
1616 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1617 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1618 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1619 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1620 if (LD->getExtensionType() == ISD::SEXTLOAD) {
1621 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1622 Match = true;
1623 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1624 }
1625 } else {
1626 if (isPre &&
1627 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1628 Match = true;
1629 Opcode = ARM::LDRB_PRE_IMM;
1630 } else if (!isPre &&
1631 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1632 Match = true;
1633 Opcode = ARM::LDRB_POST_IMM;
1634 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1635 Match = true;
1636 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1637 }
1638 }
1639 }
1640
1641 if (Match) {
1642 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1643 SDValue Chain = LD->getChain();
1644 SDValue Base = LD->getBasePtr();
1645 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1646 CurDAG->getRegister(0, MVT::i32), Chain };
1647 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1648 MVT::Other, Ops);
1649 transferMemOperands(N, New);
1650 ReplaceNode(N, New);
1651 return true;
1652 } else {
1653 SDValue Chain = LD->getChain();
1654 SDValue Base = LD->getBasePtr();
1655 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1656 CurDAG->getRegister(0, MVT::i32), Chain };
1657 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1658 MVT::Other, Ops);
1659 transferMemOperands(N, New);
1660 ReplaceNode(N, New);
1661 return true;
1662 }
1663 }
1664
1665 return false;
1666 }
1667
tryT1IndexedLoad(SDNode * N)1668 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1669 LoadSDNode *LD = cast<LoadSDNode>(N);
1670 EVT LoadedVT = LD->getMemoryVT();
1671 ISD::MemIndexedMode AM = LD->getAddressingMode();
1672 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1673 LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1674 return false;
1675
1676 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1677 if (!COffs || COffs->getZExtValue() != 4)
1678 return false;
1679
1680 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1681 // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1682 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1683 // ISel.
1684 SDValue Chain = LD->getChain();
1685 SDValue Base = LD->getBasePtr();
1686 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1687 CurDAG->getRegister(0, MVT::i32), Chain };
1688 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1689 MVT::i32, MVT::Other, Ops);
1690 transferMemOperands(N, New);
1691 ReplaceNode(N, New);
1692 return true;
1693 }
1694
tryT2IndexedLoad(SDNode * N)1695 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1696 LoadSDNode *LD = cast<LoadSDNode>(N);
1697 ISD::MemIndexedMode AM = LD->getAddressingMode();
1698 if (AM == ISD::UNINDEXED)
1699 return false;
1700
1701 EVT LoadedVT = LD->getMemoryVT();
1702 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1703 SDValue Offset;
1704 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1705 unsigned Opcode = 0;
1706 bool Match = false;
1707 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1708 switch (LoadedVT.getSimpleVT().SimpleTy) {
1709 case MVT::i32:
1710 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1711 break;
1712 case MVT::i16:
1713 if (isSExtLd)
1714 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1715 else
1716 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1717 break;
1718 case MVT::i8:
1719 case MVT::i1:
1720 if (isSExtLd)
1721 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1722 else
1723 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1724 break;
1725 default:
1726 return false;
1727 }
1728 Match = true;
1729 }
1730
1731 if (Match) {
1732 SDValue Chain = LD->getChain();
1733 SDValue Base = LD->getBasePtr();
1734 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1735 CurDAG->getRegister(0, MVT::i32), Chain };
1736 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1737 MVT::Other, Ops);
1738 transferMemOperands(N, New);
1739 ReplaceNode(N, New);
1740 return true;
1741 }
1742
1743 return false;
1744 }
1745
tryMVEIndexedLoad(SDNode * N)1746 bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
1747 EVT LoadedVT;
1748 unsigned Opcode = 0;
1749 bool isSExtLd, isPre;
1750 Align Alignment;
1751 ARMVCC::VPTCodes Pred;
1752 SDValue PredReg;
1753 SDValue Chain, Base, Offset;
1754
1755 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1756 ISD::MemIndexedMode AM = LD->getAddressingMode();
1757 if (AM == ISD::UNINDEXED)
1758 return false;
1759 LoadedVT = LD->getMemoryVT();
1760 if (!LoadedVT.isVector())
1761 return false;
1762
1763 Chain = LD->getChain();
1764 Base = LD->getBasePtr();
1765 Offset = LD->getOffset();
1766 Alignment = LD->getAlign();
1767 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1768 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1769 Pred = ARMVCC::None;
1770 PredReg = CurDAG->getRegister(0, MVT::i32);
1771 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
1772 ISD::MemIndexedMode AM = LD->getAddressingMode();
1773 if (AM == ISD::UNINDEXED)
1774 return false;
1775 LoadedVT = LD->getMemoryVT();
1776 if (!LoadedVT.isVector())
1777 return false;
1778
1779 Chain = LD->getChain();
1780 Base = LD->getBasePtr();
1781 Offset = LD->getOffset();
1782 Alignment = LD->getAlign();
1783 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1784 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1785 Pred = ARMVCC::Then;
1786 PredReg = LD->getMask();
1787 } else
1788 llvm_unreachable("Expected a Load or a Masked Load!");
1789
1790 // We allow LE non-masked loads to change the type (for example use a vldrb.8
1791 // as opposed to a vldrw.32). This can allow extra addressing modes or
1792 // alignments for what is otherwise an equivalent instruction.
1793 bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N);
1794
1795 SDValue NewOffset;
1796 if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 &&
1797 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) {
1798 if (isSExtLd)
1799 Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
1800 else
1801 Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
1802 } else if (LoadedVT == MVT::v8i8 &&
1803 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
1804 if (isSExtLd)
1805 Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
1806 else
1807 Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
1808 } else if (LoadedVT == MVT::v4i8 &&
1809 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
1810 if (isSExtLd)
1811 Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
1812 else
1813 Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
1814 } else if (Alignment >= Align(4) &&
1815 (CanChangeType || LoadedVT == MVT::v4i32 ||
1816 LoadedVT == MVT::v4f32) &&
1817 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2))
1818 Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
1819 else if (Alignment >= Align(2) &&
1820 (CanChangeType || LoadedVT == MVT::v8i16 ||
1821 LoadedVT == MVT::v8f16) &&
1822 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1))
1823 Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
1824 else if ((CanChangeType || LoadedVT == MVT::v16i8) &&
1825 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0))
1826 Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
1827 else
1828 return false;
1829
1830 SDValue Ops[] = {Base,
1831 NewOffset,
1832 CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32),
1833 PredReg,
1834 CurDAG->getRegister(0, MVT::i32), // tp_reg
1835 Chain};
1836 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1837 N->getValueType(0), MVT::Other, Ops);
1838 transferMemOperands(N, New);
1839 ReplaceUses(SDValue(N, 0), SDValue(New, 1));
1840 ReplaceUses(SDValue(N, 1), SDValue(New, 0));
1841 ReplaceUses(SDValue(N, 2), SDValue(New, 2));
1842 CurDAG->RemoveDeadNode(N);
1843 return true;
1844 }
1845
1846 /// Form a GPRPair pseudo register from a pair of GPR regs.
createGPRPairNode(EVT VT,SDValue V0,SDValue V1)1847 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1848 SDLoc dl(V0.getNode());
1849 SDValue RegClass =
1850 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1851 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1852 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1853 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1854 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1855 }
1856
1857 /// Form a D register from a pair of S registers.
createSRegPairNode(EVT VT,SDValue V0,SDValue V1)1858 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1859 SDLoc dl(V0.getNode());
1860 SDValue RegClass =
1861 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1862 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1863 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1864 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1865 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1866 }
1867
1868 /// Form a quad register from a pair of D registers.
createDRegPairNode(EVT VT,SDValue V0,SDValue V1)1869 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1870 SDLoc dl(V0.getNode());
1871 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1872 MVT::i32);
1873 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1874 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1875 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1876 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1877 }
1878
1879 /// Form 4 consecutive D registers from a pair of Q registers.
createQRegPairNode(EVT VT,SDValue V0,SDValue V1)1880 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1881 SDLoc dl(V0.getNode());
1882 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1883 MVT::i32);
1884 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1885 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1886 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1887 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1888 }
1889
1890 /// Form 4 consecutive S registers.
createQuadSRegsNode(EVT VT,SDValue V0,SDValue V1,SDValue V2,SDValue V3)1891 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1892 SDValue V2, SDValue V3) {
1893 SDLoc dl(V0.getNode());
1894 SDValue RegClass =
1895 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1896 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1897 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1898 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1899 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1900 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1901 V2, SubReg2, V3, SubReg3 };
1902 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1903 }
1904
1905 /// Form 4 consecutive D registers.
createQuadDRegsNode(EVT VT,SDValue V0,SDValue V1,SDValue V2,SDValue V3)1906 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1907 SDValue V2, SDValue V3) {
1908 SDLoc dl(V0.getNode());
1909 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1910 MVT::i32);
1911 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1912 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1913 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1914 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1915 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1916 V2, SubReg2, V3, SubReg3 };
1917 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1918 }
1919
1920 /// Form 4 consecutive Q registers.
createQuadQRegsNode(EVT VT,SDValue V0,SDValue V1,SDValue V2,SDValue V3)1921 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1922 SDValue V2, SDValue V3) {
1923 SDLoc dl(V0.getNode());
1924 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1925 MVT::i32);
1926 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1927 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1928 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1929 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1930 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1931 V2, SubReg2, V3, SubReg3 };
1932 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1933 }
1934
1935 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1936 /// of a NEON VLD or VST instruction. The supported values depend on the
1937 /// number of registers being loaded.
GetVLDSTAlign(SDValue Align,const SDLoc & dl,unsigned NumVecs,bool is64BitVector)1938 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1939 unsigned NumVecs, bool is64BitVector) {
1940 unsigned NumRegs = NumVecs;
1941 if (!is64BitVector && NumVecs < 3)
1942 NumRegs *= 2;
1943
1944 unsigned Alignment = Align->getAsZExtVal();
1945 if (Alignment >= 32 && NumRegs == 4)
1946 Alignment = 32;
1947 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1948 Alignment = 16;
1949 else if (Alignment >= 8)
1950 Alignment = 8;
1951 else
1952 Alignment = 0;
1953
1954 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1955 }
1956
isVLDfixed(unsigned Opc)1957 static bool isVLDfixed(unsigned Opc)
1958 {
1959 switch (Opc) {
1960 default: return false;
1961 case ARM::VLD1d8wb_fixed : return true;
1962 case ARM::VLD1d16wb_fixed : return true;
1963 case ARM::VLD1d64Qwb_fixed : return true;
1964 case ARM::VLD1d32wb_fixed : return true;
1965 case ARM::VLD1d64wb_fixed : return true;
1966 case ARM::VLD1d8TPseudoWB_fixed : return true;
1967 case ARM::VLD1d16TPseudoWB_fixed : return true;
1968 case ARM::VLD1d32TPseudoWB_fixed : return true;
1969 case ARM::VLD1d64TPseudoWB_fixed : return true;
1970 case ARM::VLD1d8QPseudoWB_fixed : return true;
1971 case ARM::VLD1d16QPseudoWB_fixed : return true;
1972 case ARM::VLD1d32QPseudoWB_fixed : return true;
1973 case ARM::VLD1d64QPseudoWB_fixed : return true;
1974 case ARM::VLD1q8wb_fixed : return true;
1975 case ARM::VLD1q16wb_fixed : return true;
1976 case ARM::VLD1q32wb_fixed : return true;
1977 case ARM::VLD1q64wb_fixed : return true;
1978 case ARM::VLD1DUPd8wb_fixed : return true;
1979 case ARM::VLD1DUPd16wb_fixed : return true;
1980 case ARM::VLD1DUPd32wb_fixed : return true;
1981 case ARM::VLD1DUPq8wb_fixed : return true;
1982 case ARM::VLD1DUPq16wb_fixed : return true;
1983 case ARM::VLD1DUPq32wb_fixed : return true;
1984 case ARM::VLD2d8wb_fixed : return true;
1985 case ARM::VLD2d16wb_fixed : return true;
1986 case ARM::VLD2d32wb_fixed : return true;
1987 case ARM::VLD2q8PseudoWB_fixed : return true;
1988 case ARM::VLD2q16PseudoWB_fixed : return true;
1989 case ARM::VLD2q32PseudoWB_fixed : return true;
1990 case ARM::VLD2DUPd8wb_fixed : return true;
1991 case ARM::VLD2DUPd16wb_fixed : return true;
1992 case ARM::VLD2DUPd32wb_fixed : return true;
1993 case ARM::VLD2DUPq8OddPseudoWB_fixed: return true;
1994 case ARM::VLD2DUPq16OddPseudoWB_fixed: return true;
1995 case ARM::VLD2DUPq32OddPseudoWB_fixed: return true;
1996 }
1997 }
1998
isVSTfixed(unsigned Opc)1999 static bool isVSTfixed(unsigned Opc)
2000 {
2001 switch (Opc) {
2002 default: return false;
2003 case ARM::VST1d8wb_fixed : return true;
2004 case ARM::VST1d16wb_fixed : return true;
2005 case ARM::VST1d32wb_fixed : return true;
2006 case ARM::VST1d64wb_fixed : return true;
2007 case ARM::VST1q8wb_fixed : return true;
2008 case ARM::VST1q16wb_fixed : return true;
2009 case ARM::VST1q32wb_fixed : return true;
2010 case ARM::VST1q64wb_fixed : return true;
2011 case ARM::VST1d8TPseudoWB_fixed : return true;
2012 case ARM::VST1d16TPseudoWB_fixed : return true;
2013 case ARM::VST1d32TPseudoWB_fixed : return true;
2014 case ARM::VST1d64TPseudoWB_fixed : return true;
2015 case ARM::VST1d8QPseudoWB_fixed : return true;
2016 case ARM::VST1d16QPseudoWB_fixed : return true;
2017 case ARM::VST1d32QPseudoWB_fixed : return true;
2018 case ARM::VST1d64QPseudoWB_fixed : return true;
2019 case ARM::VST2d8wb_fixed : return true;
2020 case ARM::VST2d16wb_fixed : return true;
2021 case ARM::VST2d32wb_fixed : return true;
2022 case ARM::VST2q8PseudoWB_fixed : return true;
2023 case ARM::VST2q16PseudoWB_fixed : return true;
2024 case ARM::VST2q32PseudoWB_fixed : return true;
2025 }
2026 }
2027
2028 // Get the register stride update opcode of a VLD/VST instruction that
2029 // is otherwise equivalent to the given fixed stride updating instruction.
getVLDSTRegisterUpdateOpcode(unsigned Opc)2030 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
2031 assert((isVLDfixed(Opc) || isVSTfixed(Opc))
2032 && "Incorrect fixed stride updating instruction.");
2033 switch (Opc) {
2034 default: break;
2035 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
2036 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
2037 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
2038 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
2039 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
2040 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
2041 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
2042 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
2043 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
2044 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
2045 case ARM::VLD1d8TPseudoWB_fixed: return ARM::VLD1d8TPseudoWB_register;
2046 case ARM::VLD1d16TPseudoWB_fixed: return ARM::VLD1d16TPseudoWB_register;
2047 case ARM::VLD1d32TPseudoWB_fixed: return ARM::VLD1d32TPseudoWB_register;
2048 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
2049 case ARM::VLD1d8QPseudoWB_fixed: return ARM::VLD1d8QPseudoWB_register;
2050 case ARM::VLD1d16QPseudoWB_fixed: return ARM::VLD1d16QPseudoWB_register;
2051 case ARM::VLD1d32QPseudoWB_fixed: return ARM::VLD1d32QPseudoWB_register;
2052 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
2053 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
2054 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
2055 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
2056 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
2057 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
2058 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
2059 case ARM::VLD2DUPq8OddPseudoWB_fixed: return ARM::VLD2DUPq8OddPseudoWB_register;
2060 case ARM::VLD2DUPq16OddPseudoWB_fixed: return ARM::VLD2DUPq16OddPseudoWB_register;
2061 case ARM::VLD2DUPq32OddPseudoWB_fixed: return ARM::VLD2DUPq32OddPseudoWB_register;
2062
2063 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
2064 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
2065 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
2066 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
2067 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
2068 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
2069 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
2070 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
2071 case ARM::VST1d8TPseudoWB_fixed: return ARM::VST1d8TPseudoWB_register;
2072 case ARM::VST1d16TPseudoWB_fixed: return ARM::VST1d16TPseudoWB_register;
2073 case ARM::VST1d32TPseudoWB_fixed: return ARM::VST1d32TPseudoWB_register;
2074 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
2075 case ARM::VST1d8QPseudoWB_fixed: return ARM::VST1d8QPseudoWB_register;
2076 case ARM::VST1d16QPseudoWB_fixed: return ARM::VST1d16QPseudoWB_register;
2077 case ARM::VST1d32QPseudoWB_fixed: return ARM::VST1d32QPseudoWB_register;
2078 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
2079
2080 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
2081 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
2082 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
2083 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
2084 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
2085 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
2086
2087 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
2088 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
2089 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
2090 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
2091 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
2092 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
2093
2094 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
2095 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
2096 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
2097 }
2098 return Opc; // If not one we handle, return it unchanged.
2099 }
2100
2101 /// Returns true if the given increment is a Constant known to be equal to the
2102 /// access size performed by a NEON load/store. This means the "[rN]!" form can
2103 /// be used.
isPerfectIncrement(SDValue Inc,EVT VecTy,unsigned NumVecs)2104 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
2105 auto C = dyn_cast<ConstantSDNode>(Inc);
2106 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
2107 }
2108
SelectVLD(SDNode * N,bool isUpdating,unsigned NumVecs,const uint16_t * DOpcodes,const uint16_t * QOpcodes0,const uint16_t * QOpcodes1)2109 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
2110 const uint16_t *DOpcodes,
2111 const uint16_t *QOpcodes0,
2112 const uint16_t *QOpcodes1) {
2113 assert(Subtarget->hasNEON());
2114 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
2115 SDLoc dl(N);
2116
2117 SDValue MemAddr, Align;
2118 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2119 // nodes are not intrinsics.
2120 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2121 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2122 return;
2123
2124 SDValue Chain = N->getOperand(0);
2125 EVT VT = N->getValueType(0);
2126 bool is64BitVector = VT.is64BitVector();
2127 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2128
2129 unsigned OpcodeIndex;
2130 switch (VT.getSimpleVT().SimpleTy) {
2131 default: llvm_unreachable("unhandled vld type");
2132 // Double-register operations:
2133 case MVT::v8i8: OpcodeIndex = 0; break;
2134 case MVT::v4f16:
2135 case MVT::v4bf16:
2136 case MVT::v4i16: OpcodeIndex = 1; break;
2137 case MVT::v2f32:
2138 case MVT::v2i32: OpcodeIndex = 2; break;
2139 case MVT::v1i64: OpcodeIndex = 3; break;
2140 // Quad-register operations:
2141 case MVT::v16i8: OpcodeIndex = 0; break;
2142 case MVT::v8f16:
2143 case MVT::v8bf16:
2144 case MVT::v8i16: OpcodeIndex = 1; break;
2145 case MVT::v4f32:
2146 case MVT::v4i32: OpcodeIndex = 2; break;
2147 case MVT::v2f64:
2148 case MVT::v2i64: OpcodeIndex = 3; break;
2149 }
2150
2151 EVT ResTy;
2152 if (NumVecs == 1)
2153 ResTy = VT;
2154 else {
2155 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2156 if (!is64BitVector)
2157 ResTyElts *= 2;
2158 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2159 }
2160 std::vector<EVT> ResTys;
2161 ResTys.push_back(ResTy);
2162 if (isUpdating)
2163 ResTys.push_back(MVT::i32);
2164 ResTys.push_back(MVT::Other);
2165
2166 SDValue Pred = getAL(CurDAG, dl);
2167 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2168 SDNode *VLd;
2169 SmallVector<SDValue, 7> Ops;
2170
2171 // Double registers and VLD1/VLD2 quad registers are directly supported.
2172 if (is64BitVector || NumVecs <= 2) {
2173 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2174 QOpcodes0[OpcodeIndex]);
2175 Ops.push_back(MemAddr);
2176 Ops.push_back(Align);
2177 if (isUpdating) {
2178 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2179 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2180 if (!IsImmUpdate) {
2181 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
2182 // check for the opcode rather than the number of vector elements.
2183 if (isVLDfixed(Opc))
2184 Opc = getVLDSTRegisterUpdateOpcode(Opc);
2185 Ops.push_back(Inc);
2186 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
2187 // the operands if not such an opcode.
2188 } else if (!isVLDfixed(Opc))
2189 Ops.push_back(Reg0);
2190 }
2191 Ops.push_back(Pred);
2192 Ops.push_back(Reg0);
2193 Ops.push_back(Chain);
2194 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2195
2196 } else {
2197 // Otherwise, quad registers are loaded with two separate instructions,
2198 // where one loads the even registers and the other loads the odd registers.
2199 EVT AddrTy = MemAddr.getValueType();
2200
2201 // Load the even subregs. This is always an updating load, so that it
2202 // provides the address to the second load for the odd subregs.
2203 SDValue ImplDef =
2204 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2205 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
2206 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2207 ResTy, AddrTy, MVT::Other, OpsA);
2208 Chain = SDValue(VLdA, 2);
2209
2210 // Load the odd subregs.
2211 Ops.push_back(SDValue(VLdA, 1));
2212 Ops.push_back(Align);
2213 if (isUpdating) {
2214 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2215 assert(isa<ConstantSDNode>(Inc.getNode()) &&
2216 "only constant post-increment update allowed for VLD3/4");
2217 (void)Inc;
2218 Ops.push_back(Reg0);
2219 }
2220 Ops.push_back(SDValue(VLdA, 0));
2221 Ops.push_back(Pred);
2222 Ops.push_back(Reg0);
2223 Ops.push_back(Chain);
2224 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
2225 }
2226
2227 // Transfer memoperands.
2228 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2229 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
2230
2231 if (NumVecs == 1) {
2232 ReplaceNode(N, VLd);
2233 return;
2234 }
2235
2236 // Extract out the subregisters.
2237 SDValue SuperReg = SDValue(VLd, 0);
2238 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2239 ARM::qsub_3 == ARM::qsub_0 + 3,
2240 "Unexpected subreg numbering");
2241 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
2242 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2243 ReplaceUses(SDValue(N, Vec),
2244 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2245 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
2246 if (isUpdating)
2247 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
2248 CurDAG->RemoveDeadNode(N);
2249 }
2250
SelectVST(SDNode * N,bool isUpdating,unsigned NumVecs,const uint16_t * DOpcodes,const uint16_t * QOpcodes0,const uint16_t * QOpcodes1)2251 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
2252 const uint16_t *DOpcodes,
2253 const uint16_t *QOpcodes0,
2254 const uint16_t *QOpcodes1) {
2255 assert(Subtarget->hasNEON());
2256 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
2257 SDLoc dl(N);
2258
2259 SDValue MemAddr, Align;
2260 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2261 // nodes are not intrinsics.
2262 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2263 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2264 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2265 return;
2266
2267 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2268
2269 SDValue Chain = N->getOperand(0);
2270 EVT VT = N->getOperand(Vec0Idx).getValueType();
2271 bool is64BitVector = VT.is64BitVector();
2272 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2273
2274 unsigned OpcodeIndex;
2275 switch (VT.getSimpleVT().SimpleTy) {
2276 default: llvm_unreachable("unhandled vst type");
2277 // Double-register operations:
2278 case MVT::v8i8: OpcodeIndex = 0; break;
2279 case MVT::v4f16:
2280 case MVT::v4bf16:
2281 case MVT::v4i16: OpcodeIndex = 1; break;
2282 case MVT::v2f32:
2283 case MVT::v2i32: OpcodeIndex = 2; break;
2284 case MVT::v1i64: OpcodeIndex = 3; break;
2285 // Quad-register operations:
2286 case MVT::v16i8: OpcodeIndex = 0; break;
2287 case MVT::v8f16:
2288 case MVT::v8bf16:
2289 case MVT::v8i16: OpcodeIndex = 1; break;
2290 case MVT::v4f32:
2291 case MVT::v4i32: OpcodeIndex = 2; break;
2292 case MVT::v2f64:
2293 case MVT::v2i64: OpcodeIndex = 3; break;
2294 }
2295
2296 std::vector<EVT> ResTys;
2297 if (isUpdating)
2298 ResTys.push_back(MVT::i32);
2299 ResTys.push_back(MVT::Other);
2300
2301 SDValue Pred = getAL(CurDAG, dl);
2302 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2303 SmallVector<SDValue, 7> Ops;
2304
2305 // Double registers and VST1/VST2 quad registers are directly supported.
2306 if (is64BitVector || NumVecs <= 2) {
2307 SDValue SrcReg;
2308 if (NumVecs == 1) {
2309 SrcReg = N->getOperand(Vec0Idx);
2310 } else if (is64BitVector) {
2311 // Form a REG_SEQUENCE to force register allocation.
2312 SDValue V0 = N->getOperand(Vec0Idx + 0);
2313 SDValue V1 = N->getOperand(Vec0Idx + 1);
2314 if (NumVecs == 2)
2315 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2316 else {
2317 SDValue V2 = N->getOperand(Vec0Idx + 2);
2318 // If it's a vst3, form a quad D-register and leave the last part as
2319 // an undef.
2320 SDValue V3 = (NumVecs == 3)
2321 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2322 : N->getOperand(Vec0Idx + 3);
2323 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2324 }
2325 } else {
2326 // Form a QQ register.
2327 SDValue Q0 = N->getOperand(Vec0Idx);
2328 SDValue Q1 = N->getOperand(Vec0Idx + 1);
2329 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2330 }
2331
2332 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2333 QOpcodes0[OpcodeIndex]);
2334 Ops.push_back(MemAddr);
2335 Ops.push_back(Align);
2336 if (isUpdating) {
2337 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2338 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2339 if (!IsImmUpdate) {
2340 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2341 // check for the opcode rather than the number of vector elements.
2342 if (isVSTfixed(Opc))
2343 Opc = getVLDSTRegisterUpdateOpcode(Opc);
2344 Ops.push_back(Inc);
2345 }
2346 // VST1/VST2 fixed increment does not need Reg0 so only include it in
2347 // the operands if not such an opcode.
2348 else if (!isVSTfixed(Opc))
2349 Ops.push_back(Reg0);
2350 }
2351 Ops.push_back(SrcReg);
2352 Ops.push_back(Pred);
2353 Ops.push_back(Reg0);
2354 Ops.push_back(Chain);
2355 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2356
2357 // Transfer memoperands.
2358 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
2359
2360 ReplaceNode(N, VSt);
2361 return;
2362 }
2363
2364 // Otherwise, quad registers are stored with two separate instructions,
2365 // where one stores the even registers and the other stores the odd registers.
2366
2367 // Form the QQQQ REG_SEQUENCE.
2368 SDValue V0 = N->getOperand(Vec0Idx + 0);
2369 SDValue V1 = N->getOperand(Vec0Idx + 1);
2370 SDValue V2 = N->getOperand(Vec0Idx + 2);
2371 SDValue V3 = (NumVecs == 3)
2372 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2373 : N->getOperand(Vec0Idx + 3);
2374 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2375
2376 // Store the even D registers. This is always an updating store, so that it
2377 // provides the address to the second store for the odd subregs.
2378 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2379 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2380 MemAddr.getValueType(),
2381 MVT::Other, OpsA);
2382 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
2383 Chain = SDValue(VStA, 1);
2384
2385 // Store the odd D registers.
2386 Ops.push_back(SDValue(VStA, 0));
2387 Ops.push_back(Align);
2388 if (isUpdating) {
2389 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2390 assert(isa<ConstantSDNode>(Inc.getNode()) &&
2391 "only constant post-increment update allowed for VST3/4");
2392 (void)Inc;
2393 Ops.push_back(Reg0);
2394 }
2395 Ops.push_back(RegSeq);
2396 Ops.push_back(Pred);
2397 Ops.push_back(Reg0);
2398 Ops.push_back(Chain);
2399 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2400 Ops);
2401 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
2402 ReplaceNode(N, VStB);
2403 }
2404
SelectVLDSTLane(SDNode * N,bool IsLoad,bool isUpdating,unsigned NumVecs,const uint16_t * DOpcodes,const uint16_t * QOpcodes)2405 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2406 unsigned NumVecs,
2407 const uint16_t *DOpcodes,
2408 const uint16_t *QOpcodes) {
2409 assert(Subtarget->hasNEON());
2410 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2411 SDLoc dl(N);
2412
2413 SDValue MemAddr, Align;
2414 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2415 // nodes are not intrinsics.
2416 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2417 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2418 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2419 return;
2420
2421 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2422
2423 SDValue Chain = N->getOperand(0);
2424 unsigned Lane = N->getConstantOperandVal(Vec0Idx + NumVecs);
2425 EVT VT = N->getOperand(Vec0Idx).getValueType();
2426 bool is64BitVector = VT.is64BitVector();
2427
2428 unsigned Alignment = 0;
2429 if (NumVecs != 3) {
2430 Alignment = Align->getAsZExtVal();
2431 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2432 if (Alignment > NumBytes)
2433 Alignment = NumBytes;
2434 if (Alignment < 8 && Alignment < NumBytes)
2435 Alignment = 0;
2436 // Alignment must be a power of two; make sure of that.
2437 Alignment = (Alignment & -Alignment);
2438 if (Alignment == 1)
2439 Alignment = 0;
2440 }
2441 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2442
2443 unsigned OpcodeIndex;
2444 switch (VT.getSimpleVT().SimpleTy) {
2445 default: llvm_unreachable("unhandled vld/vst lane type");
2446 // Double-register operations:
2447 case MVT::v8i8: OpcodeIndex = 0; break;
2448 case MVT::v4f16:
2449 case MVT::v4bf16:
2450 case MVT::v4i16: OpcodeIndex = 1; break;
2451 case MVT::v2f32:
2452 case MVT::v2i32: OpcodeIndex = 2; break;
2453 // Quad-register operations:
2454 case MVT::v8f16:
2455 case MVT::v8bf16:
2456 case MVT::v8i16: OpcodeIndex = 0; break;
2457 case MVT::v4f32:
2458 case MVT::v4i32: OpcodeIndex = 1; break;
2459 }
2460
2461 std::vector<EVT> ResTys;
2462 if (IsLoad) {
2463 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2464 if (!is64BitVector)
2465 ResTyElts *= 2;
2466 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2467 MVT::i64, ResTyElts));
2468 }
2469 if (isUpdating)
2470 ResTys.push_back(MVT::i32);
2471 ResTys.push_back(MVT::Other);
2472
2473 SDValue Pred = getAL(CurDAG, dl);
2474 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2475
2476 SmallVector<SDValue, 8> Ops;
2477 Ops.push_back(MemAddr);
2478 Ops.push_back(Align);
2479 if (isUpdating) {
2480 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2481 bool IsImmUpdate =
2482 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2483 Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2484 }
2485
2486 SDValue SuperReg;
2487 SDValue V0 = N->getOperand(Vec0Idx + 0);
2488 SDValue V1 = N->getOperand(Vec0Idx + 1);
2489 if (NumVecs == 2) {
2490 if (is64BitVector)
2491 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2492 else
2493 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2494 } else {
2495 SDValue V2 = N->getOperand(Vec0Idx + 2);
2496 SDValue V3 = (NumVecs == 3)
2497 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2498 : N->getOperand(Vec0Idx + 3);
2499 if (is64BitVector)
2500 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2501 else
2502 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2503 }
2504 Ops.push_back(SuperReg);
2505 Ops.push_back(getI32Imm(Lane, dl));
2506 Ops.push_back(Pred);
2507 Ops.push_back(Reg0);
2508 Ops.push_back(Chain);
2509
2510 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2511 QOpcodes[OpcodeIndex]);
2512 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2513 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
2514 if (!IsLoad) {
2515 ReplaceNode(N, VLdLn);
2516 return;
2517 }
2518
2519 // Extract the subregisters.
2520 SuperReg = SDValue(VLdLn, 0);
2521 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2522 ARM::qsub_3 == ARM::qsub_0 + 3,
2523 "Unexpected subreg numbering");
2524 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2525 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2526 ReplaceUses(SDValue(N, Vec),
2527 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2528 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2529 if (isUpdating)
2530 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2531 CurDAG->RemoveDeadNode(N);
2532 }
2533
2534 template <typename SDValueVector>
AddMVEPredicateToOps(SDValueVector & Ops,SDLoc Loc,SDValue PredicateMask)2535 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2536 SDValue PredicateMask) {
2537 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2538 Ops.push_back(PredicateMask);
2539 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2540 }
2541
2542 template <typename SDValueVector>
AddMVEPredicateToOps(SDValueVector & Ops,SDLoc Loc,SDValue PredicateMask,SDValue Inactive)2543 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2544 SDValue PredicateMask,
2545 SDValue Inactive) {
2546 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2547 Ops.push_back(PredicateMask);
2548 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2549 Ops.push_back(Inactive);
2550 }
2551
2552 template <typename SDValueVector>
AddEmptyMVEPredicateToOps(SDValueVector & Ops,SDLoc Loc)2553 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) {
2554 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2555 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2556 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2557 }
2558
2559 template <typename SDValueVector>
AddEmptyMVEPredicateToOps(SDValueVector & Ops,SDLoc Loc,EVT InactiveTy)2560 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2561 EVT InactiveTy) {
2562 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2563 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2564 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2565 Ops.push_back(SDValue(
2566 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0));
2567 }
2568
SelectMVE_WB(SDNode * N,const uint16_t * Opcodes,bool Predicated)2569 void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes,
2570 bool Predicated) {
2571 SDLoc Loc(N);
2572 SmallVector<SDValue, 8> Ops;
2573
2574 uint16_t Opcode;
2575 switch (N->getValueType(1).getVectorElementType().getSizeInBits()) {
2576 case 32:
2577 Opcode = Opcodes[0];
2578 break;
2579 case 64:
2580 Opcode = Opcodes[1];
2581 break;
2582 default:
2583 llvm_unreachable("bad vector element size in SelectMVE_WB");
2584 }
2585
2586 Ops.push_back(N->getOperand(2)); // vector of base addresses
2587
2588 int32_t ImmValue = N->getConstantOperandVal(3);
2589 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset
2590
2591 if (Predicated)
2592 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
2593 else
2594 AddEmptyMVEPredicateToOps(Ops, Loc);
2595
2596 Ops.push_back(N->getOperand(0)); // chain
2597
2598 SmallVector<EVT, 8> VTs;
2599 VTs.push_back(N->getValueType(1));
2600 VTs.push_back(N->getValueType(0));
2601 VTs.push_back(N->getValueType(2));
2602
2603 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), VTs, Ops);
2604 ReplaceUses(SDValue(N, 0), SDValue(New, 1));
2605 ReplaceUses(SDValue(N, 1), SDValue(New, 0));
2606 ReplaceUses(SDValue(N, 2), SDValue(New, 2));
2607 transferMemOperands(N, New);
2608 CurDAG->RemoveDeadNode(N);
2609 }
2610
SelectMVE_LongShift(SDNode * N,uint16_t Opcode,bool Immediate,bool HasSaturationOperand)2611 void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode,
2612 bool Immediate,
2613 bool HasSaturationOperand) {
2614 SDLoc Loc(N);
2615 SmallVector<SDValue, 8> Ops;
2616
2617 // Two 32-bit halves of the value to be shifted
2618 Ops.push_back(N->getOperand(1));
2619 Ops.push_back(N->getOperand(2));
2620
2621 // The shift count
2622 if (Immediate) {
2623 int32_t ImmValue = N->getConstantOperandVal(3);
2624 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
2625 } else {
2626 Ops.push_back(N->getOperand(3));
2627 }
2628
2629 // The immediate saturation operand, if any
2630 if (HasSaturationOperand) {
2631 int32_t SatOp = N->getConstantOperandVal(4);
2632 int SatBit = (SatOp == 64 ? 0 : 1);
2633 Ops.push_back(getI32Imm(SatBit, Loc));
2634 }
2635
2636 // MVE scalar shifts are IT-predicable, so include the standard
2637 // predicate arguments.
2638 Ops.push_back(getAL(CurDAG, Loc));
2639 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2640
2641 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2642 }
2643
SelectMVE_VADCSBC(SDNode * N,uint16_t OpcodeWithCarry,uint16_t OpcodeWithNoCarry,bool Add,bool Predicated)2644 void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
2645 uint16_t OpcodeWithNoCarry,
2646 bool Add, bool Predicated) {
2647 SDLoc Loc(N);
2648 SmallVector<SDValue, 8> Ops;
2649 uint16_t Opcode;
2650
2651 unsigned FirstInputOp = Predicated ? 2 : 1;
2652
2653 // Two input vectors and the input carry flag
2654 Ops.push_back(N->getOperand(FirstInputOp));
2655 Ops.push_back(N->getOperand(FirstInputOp + 1));
2656 SDValue CarryIn = N->getOperand(FirstInputOp + 2);
2657 ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn);
2658 uint32_t CarryMask = 1 << 29;
2659 uint32_t CarryExpected = Add ? 0 : CarryMask;
2660 if (CarryInConstant &&
2661 (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) {
2662 Opcode = OpcodeWithNoCarry;
2663 } else {
2664 Ops.push_back(CarryIn);
2665 Opcode = OpcodeWithCarry;
2666 }
2667
2668 if (Predicated)
2669 AddMVEPredicateToOps(Ops, Loc,
2670 N->getOperand(FirstInputOp + 3), // predicate
2671 N->getOperand(FirstInputOp - 1)); // inactive
2672 else
2673 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
2674
2675 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2676 }
2677
SelectMVE_VSHLC(SDNode * N,bool Predicated)2678 void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) {
2679 SDLoc Loc(N);
2680 SmallVector<SDValue, 8> Ops;
2681
2682 // One vector input, followed by a 32-bit word of bits to shift in
2683 // and then an immediate shift count
2684 Ops.push_back(N->getOperand(1));
2685 Ops.push_back(N->getOperand(2));
2686 int32_t ImmValue = N->getConstantOperandVal(3);
2687 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
2688
2689 if (Predicated)
2690 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
2691 else
2692 AddEmptyMVEPredicateToOps(Ops, Loc);
2693
2694 CurDAG->SelectNodeTo(N, ARM::MVE_VSHLC, N->getVTList(), ArrayRef(Ops));
2695 }
2696
SDValueToConstBool(SDValue SDVal)2697 static bool SDValueToConstBool(SDValue SDVal) {
2698 assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant");
2699 ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal);
2700 uint64_t Value = SDValConstant->getZExtValue();
2701 assert((Value == 0 || Value == 1) && "expected value 0 or 1");
2702 return Value;
2703 }
2704
SelectBaseMVE_VMLLDAV(SDNode * N,bool Predicated,const uint16_t * OpcodesS,const uint16_t * OpcodesU,size_t Stride,size_t TySize)2705 void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
2706 const uint16_t *OpcodesS,
2707 const uint16_t *OpcodesU,
2708 size_t Stride, size_t TySize) {
2709 assert(TySize < Stride && "Invalid TySize");
2710 bool IsUnsigned = SDValueToConstBool(N->getOperand(1));
2711 bool IsSub = SDValueToConstBool(N->getOperand(2));
2712 bool IsExchange = SDValueToConstBool(N->getOperand(3));
2713 if (IsUnsigned) {
2714 assert(!IsSub &&
2715 "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist");
2716 assert(!IsExchange &&
2717 "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist");
2718 }
2719
2720 auto OpIsZero = [N](size_t OpNo) {
2721 return isNullConstant(N->getOperand(OpNo));
2722 };
2723
2724 // If the input accumulator value is not zero, select an instruction with
2725 // accumulator, otherwise select an instruction without accumulator
2726 bool IsAccum = !(OpIsZero(4) && OpIsZero(5));
2727
2728 const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS;
2729 if (IsSub)
2730 Opcodes += 4 * Stride;
2731 if (IsExchange)
2732 Opcodes += 2 * Stride;
2733 if (IsAccum)
2734 Opcodes += Stride;
2735 uint16_t Opcode = Opcodes[TySize];
2736
2737 SDLoc Loc(N);
2738 SmallVector<SDValue, 8> Ops;
2739 // Push the accumulator operands, if they are used
2740 if (IsAccum) {
2741 Ops.push_back(N->getOperand(4));
2742 Ops.push_back(N->getOperand(5));
2743 }
2744 // Push the two vector operands
2745 Ops.push_back(N->getOperand(6));
2746 Ops.push_back(N->getOperand(7));
2747
2748 if (Predicated)
2749 AddMVEPredicateToOps(Ops, Loc, N->getOperand(8));
2750 else
2751 AddEmptyMVEPredicateToOps(Ops, Loc);
2752
2753 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2754 }
2755
SelectMVE_VMLLDAV(SDNode * N,bool Predicated,const uint16_t * OpcodesS,const uint16_t * OpcodesU)2756 void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated,
2757 const uint16_t *OpcodesS,
2758 const uint16_t *OpcodesU) {
2759 EVT VecTy = N->getOperand(6).getValueType();
2760 size_t SizeIndex;
2761 switch (VecTy.getVectorElementType().getSizeInBits()) {
2762 case 16:
2763 SizeIndex = 0;
2764 break;
2765 case 32:
2766 SizeIndex = 1;
2767 break;
2768 default:
2769 llvm_unreachable("bad vector element size");
2770 }
2771
2772 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex);
2773 }
2774
SelectMVE_VRMLLDAVH(SDNode * N,bool Predicated,const uint16_t * OpcodesS,const uint16_t * OpcodesU)2775 void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated,
2776 const uint16_t *OpcodesS,
2777 const uint16_t *OpcodesU) {
2778 assert(
2779 N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() ==
2780 32 &&
2781 "bad vector element size");
2782 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0);
2783 }
2784
SelectMVE_VLD(SDNode * N,unsigned NumVecs,const uint16_t * const * Opcodes,bool HasWriteback)2785 void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs,
2786 const uint16_t *const *Opcodes,
2787 bool HasWriteback) {
2788 EVT VT = N->getValueType(0);
2789 SDLoc Loc(N);
2790
2791 const uint16_t *OurOpcodes;
2792 switch (VT.getVectorElementType().getSizeInBits()) {
2793 case 8:
2794 OurOpcodes = Opcodes[0];
2795 break;
2796 case 16:
2797 OurOpcodes = Opcodes[1];
2798 break;
2799 case 32:
2800 OurOpcodes = Opcodes[2];
2801 break;
2802 default:
2803 llvm_unreachable("bad vector element size in SelectMVE_VLD");
2804 }
2805
2806 EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2);
2807 SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other};
2808 unsigned PtrOperand = HasWriteback ? 1 : 2;
2809
2810 auto Data = SDValue(
2811 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0);
2812 SDValue Chain = N->getOperand(0);
2813 // Add a MVE_VLDn instruction for each Vec, except the last
2814 for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) {
2815 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
2816 auto LoadInst =
2817 CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops);
2818 Data = SDValue(LoadInst, 0);
2819 Chain = SDValue(LoadInst, 1);
2820 transferMemOperands(N, LoadInst);
2821 }
2822 // The last may need a writeback on it
2823 if (HasWriteback)
2824 ResultTys = {DataTy, MVT::i32, MVT::Other};
2825 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
2826 auto LoadInst =
2827 CurDAG->getMachineNode(OurOpcodes[NumVecs - 1], Loc, ResultTys, Ops);
2828 transferMemOperands(N, LoadInst);
2829
2830 unsigned i;
2831 for (i = 0; i < NumVecs; i++)
2832 ReplaceUses(SDValue(N, i),
2833 CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT,
2834 SDValue(LoadInst, 0)));
2835 if (HasWriteback)
2836 ReplaceUses(SDValue(N, i++), SDValue(LoadInst, 1));
2837 ReplaceUses(SDValue(N, i), SDValue(LoadInst, HasWriteback ? 2 : 1));
2838 CurDAG->RemoveDeadNode(N);
2839 }
2840
SelectMVE_VxDUP(SDNode * N,const uint16_t * Opcodes,bool Wrapping,bool Predicated)2841 void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
2842 bool Wrapping, bool Predicated) {
2843 EVT VT = N->getValueType(0);
2844 SDLoc Loc(N);
2845
2846 uint16_t Opcode;
2847 switch (VT.getScalarSizeInBits()) {
2848 case 8:
2849 Opcode = Opcodes[0];
2850 break;
2851 case 16:
2852 Opcode = Opcodes[1];
2853 break;
2854 case 32:
2855 Opcode = Opcodes[2];
2856 break;
2857 default:
2858 llvm_unreachable("bad vector element size in SelectMVE_VxDUP");
2859 }
2860
2861 SmallVector<SDValue, 8> Ops;
2862 unsigned OpIdx = 1;
2863
2864 SDValue Inactive;
2865 if (Predicated)
2866 Inactive = N->getOperand(OpIdx++);
2867
2868 Ops.push_back(N->getOperand(OpIdx++)); // base
2869 if (Wrapping)
2870 Ops.push_back(N->getOperand(OpIdx++)); // limit
2871
2872 SDValue ImmOp = N->getOperand(OpIdx++); // step
2873 int ImmValue = ImmOp->getAsZExtVal();
2874 Ops.push_back(getI32Imm(ImmValue, Loc));
2875
2876 if (Predicated)
2877 AddMVEPredicateToOps(Ops, Loc, N->getOperand(OpIdx), Inactive);
2878 else
2879 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
2880
2881 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2882 }
2883
SelectCDE_CXxD(SDNode * N,uint16_t Opcode,size_t NumExtraOps,bool HasAccum)2884 void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode,
2885 size_t NumExtraOps, bool HasAccum) {
2886 bool IsBigEndian = CurDAG->getDataLayout().isBigEndian();
2887 SDLoc Loc(N);
2888 SmallVector<SDValue, 8> Ops;
2889
2890 unsigned OpIdx = 1;
2891
2892 // Convert and append the immediate operand designating the coprocessor.
2893 SDValue ImmCorpoc = N->getOperand(OpIdx++);
2894 uint32_t ImmCoprocVal = ImmCorpoc->getAsZExtVal();
2895 Ops.push_back(getI32Imm(ImmCoprocVal, Loc));
2896
2897 // For accumulating variants copy the low and high order parts of the
2898 // accumulator into a register pair and add it to the operand vector.
2899 if (HasAccum) {
2900 SDValue AccLo = N->getOperand(OpIdx++);
2901 SDValue AccHi = N->getOperand(OpIdx++);
2902 if (IsBigEndian)
2903 std::swap(AccLo, AccHi);
2904 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, AccLo, AccHi), 0));
2905 }
2906
2907 // Copy extra operands as-is.
2908 for (size_t I = 0; I < NumExtraOps; I++)
2909 Ops.push_back(N->getOperand(OpIdx++));
2910
2911 // Convert and append the immediate operand
2912 SDValue Imm = N->getOperand(OpIdx);
2913 uint32_t ImmVal = Imm->getAsZExtVal();
2914 Ops.push_back(getI32Imm(ImmVal, Loc));
2915
2916 // Accumulating variants are IT-predicable, add predicate operands.
2917 if (HasAccum) {
2918 SDValue Pred = getAL(CurDAG, Loc);
2919 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2920 Ops.push_back(Pred);
2921 Ops.push_back(PredReg);
2922 }
2923
2924 // Create the CDE intruction
2925 SDNode *InstrNode = CurDAG->getMachineNode(Opcode, Loc, MVT::Untyped, Ops);
2926 SDValue ResultPair = SDValue(InstrNode, 0);
2927
2928 // The original intrinsic had two outputs, and the output of the dual-register
2929 // CDE instruction is a register pair. We need to extract the two subregisters
2930 // and replace all uses of the original outputs with the extracted
2931 // subregisters.
2932 uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1};
2933 if (IsBigEndian)
2934 std::swap(SubRegs[0], SubRegs[1]);
2935
2936 for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) {
2937 if (SDValue(N, ResIdx).use_empty())
2938 continue;
2939 SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegs[ResIdx], Loc,
2940 MVT::i32, ResultPair);
2941 ReplaceUses(SDValue(N, ResIdx), SubReg);
2942 }
2943
2944 CurDAG->RemoveDeadNode(N);
2945 }
2946
SelectVLDDup(SDNode * N,bool IsIntrinsic,bool isUpdating,unsigned NumVecs,const uint16_t * DOpcodes,const uint16_t * QOpcodes0,const uint16_t * QOpcodes1)2947 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2948 bool isUpdating, unsigned NumVecs,
2949 const uint16_t *DOpcodes,
2950 const uint16_t *QOpcodes0,
2951 const uint16_t *QOpcodes1) {
2952 assert(Subtarget->hasNEON());
2953 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2954 SDLoc dl(N);
2955
2956 SDValue MemAddr, Align;
2957 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2958 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2959 return;
2960
2961 SDValue Chain = N->getOperand(0);
2962 EVT VT = N->getValueType(0);
2963 bool is64BitVector = VT.is64BitVector();
2964
2965 unsigned Alignment = 0;
2966 if (NumVecs != 3) {
2967 Alignment = Align->getAsZExtVal();
2968 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2969 if (Alignment > NumBytes)
2970 Alignment = NumBytes;
2971 if (Alignment < 8 && Alignment < NumBytes)
2972 Alignment = 0;
2973 // Alignment must be a power of two; make sure of that.
2974 Alignment = (Alignment & -Alignment);
2975 if (Alignment == 1)
2976 Alignment = 0;
2977 }
2978 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2979
2980 unsigned OpcodeIndex;
2981 switch (VT.getSimpleVT().SimpleTy) {
2982 default: llvm_unreachable("unhandled vld-dup type");
2983 case MVT::v8i8:
2984 case MVT::v16i8: OpcodeIndex = 0; break;
2985 case MVT::v4i16:
2986 case MVT::v8i16:
2987 case MVT::v4f16:
2988 case MVT::v8f16:
2989 case MVT::v4bf16:
2990 case MVT::v8bf16:
2991 OpcodeIndex = 1; break;
2992 case MVT::v2f32:
2993 case MVT::v2i32:
2994 case MVT::v4f32:
2995 case MVT::v4i32: OpcodeIndex = 2; break;
2996 case MVT::v1f64:
2997 case MVT::v1i64: OpcodeIndex = 3; break;
2998 }
2999
3000 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
3001 if (!is64BitVector)
3002 ResTyElts *= 2;
3003 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
3004
3005 std::vector<EVT> ResTys;
3006 ResTys.push_back(ResTy);
3007 if (isUpdating)
3008 ResTys.push_back(MVT::i32);
3009 ResTys.push_back(MVT::Other);
3010
3011 SDValue Pred = getAL(CurDAG, dl);
3012 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3013
3014 SmallVector<SDValue, 6> Ops;
3015 Ops.push_back(MemAddr);
3016 Ops.push_back(Align);
3017 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex]
3018 : (NumVecs == 1) ? QOpcodes0[OpcodeIndex]
3019 : QOpcodes1[OpcodeIndex];
3020 if (isUpdating) {
3021 SDValue Inc = N->getOperand(2);
3022 bool IsImmUpdate =
3023 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
3024 if (IsImmUpdate) {
3025 if (!isVLDfixed(Opc))
3026 Ops.push_back(Reg0);
3027 } else {
3028 if (isVLDfixed(Opc))
3029 Opc = getVLDSTRegisterUpdateOpcode(Opc);
3030 Ops.push_back(Inc);
3031 }
3032 }
3033 if (is64BitVector || NumVecs == 1) {
3034 // Double registers and VLD1 quad registers are directly supported.
3035 } else if (NumVecs == 2) {
3036 const SDValue OpsA[] = {MemAddr, Align, Pred, Reg0, Chain};
3037 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy,
3038 MVT::Other, OpsA);
3039 Chain = SDValue(VLdA, 1);
3040 } else {
3041 SDValue ImplDef = SDValue(
3042 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
3043 const SDValue OpsA[] = {MemAddr, Align, ImplDef, Pred, Reg0, Chain};
3044 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy,
3045 MVT::Other, OpsA);
3046 Ops.push_back(SDValue(VLdA, 0));
3047 Chain = SDValue(VLdA, 1);
3048 }
3049
3050 Ops.push_back(Pred);
3051 Ops.push_back(Reg0);
3052 Ops.push_back(Chain);
3053
3054 SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
3055
3056 // Transfer memoperands.
3057 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3058 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
3059
3060 // Extract the subregisters.
3061 if (NumVecs == 1) {
3062 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
3063 } else {
3064 SDValue SuperReg = SDValue(VLdDup, 0);
3065 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
3066 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
3067 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
3068 ReplaceUses(SDValue(N, Vec),
3069 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
3070 }
3071 }
3072 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
3073 if (isUpdating)
3074 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
3075 CurDAG->RemoveDeadNode(N);
3076 }
3077
tryInsertVectorElt(SDNode * N)3078 bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) {
3079 if (!Subtarget->hasMVEIntegerOps())
3080 return false;
3081
3082 SDLoc dl(N);
3083
3084 // We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and
3085 // extracts of v8f16 and v8i16 vectors. Check that we have two adjacent
3086 // inserts of the correct type:
3087 SDValue Ins1 = SDValue(N, 0);
3088 SDValue Ins2 = N->getOperand(0);
3089 EVT VT = Ins1.getValueType();
3090 if (Ins2.getOpcode() != ISD::INSERT_VECTOR_ELT || !Ins2.hasOneUse() ||
3091 !isa<ConstantSDNode>(Ins1.getOperand(2)) ||
3092 !isa<ConstantSDNode>(Ins2.getOperand(2)) ||
3093 (VT != MVT::v8f16 && VT != MVT::v8i16) || (Ins2.getValueType() != VT))
3094 return false;
3095
3096 unsigned Lane1 = Ins1.getConstantOperandVal(2);
3097 unsigned Lane2 = Ins2.getConstantOperandVal(2);
3098 if (Lane2 % 2 != 0 || Lane1 != Lane2 + 1)
3099 return false;
3100
3101 // If the inserted values will be able to use T/B already, leave it to the
3102 // existing tablegen patterns. For example VCVTT/VCVTB.
3103 SDValue Val1 = Ins1.getOperand(1);
3104 SDValue Val2 = Ins2.getOperand(1);
3105 if (Val1.getOpcode() == ISD::FP_ROUND || Val2.getOpcode() == ISD::FP_ROUND)
3106 return false;
3107
3108 // Check if the inserted values are both extracts.
3109 if ((Val1.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
3110 Val1.getOpcode() == ARMISD::VGETLANEu) &&
3111 (Val2.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
3112 Val2.getOpcode() == ARMISD::VGETLANEu) &&
3113 isa<ConstantSDNode>(Val1.getOperand(1)) &&
3114 isa<ConstantSDNode>(Val2.getOperand(1)) &&
3115 (Val1.getOperand(0).getValueType() == MVT::v8f16 ||
3116 Val1.getOperand(0).getValueType() == MVT::v8i16) &&
3117 (Val2.getOperand(0).getValueType() == MVT::v8f16 ||
3118 Val2.getOperand(0).getValueType() == MVT::v8i16)) {
3119 unsigned ExtractLane1 = Val1.getConstantOperandVal(1);
3120 unsigned ExtractLane2 = Val2.getConstantOperandVal(1);
3121
3122 // If the two extracted lanes are from the same place and adjacent, this
3123 // simplifies into a f32 lane move.
3124 if (Val1.getOperand(0) == Val2.getOperand(0) && ExtractLane2 % 2 == 0 &&
3125 ExtractLane1 == ExtractLane2 + 1) {
3126 SDValue NewExt = CurDAG->getTargetExtractSubreg(
3127 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val1.getOperand(0));
3128 SDValue NewIns = CurDAG->getTargetInsertSubreg(
3129 ARM::ssub_0 + Lane2 / 2, dl, VT, Ins2.getOperand(0),
3130 NewExt);
3131 ReplaceUses(Ins1, NewIns);
3132 return true;
3133 }
3134
3135 // Else v8i16 pattern of an extract and an insert, with a optional vmovx for
3136 // extracting odd lanes.
3137 if (VT == MVT::v8i16 && Subtarget->hasFullFP16()) {
3138 SDValue Inp1 = CurDAG->getTargetExtractSubreg(
3139 ARM::ssub_0 + ExtractLane1 / 2, dl, MVT::f32, Val1.getOperand(0));
3140 SDValue Inp2 = CurDAG->getTargetExtractSubreg(
3141 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val2.getOperand(0));
3142 if (ExtractLane1 % 2 != 0)
3143 Inp1 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp1), 0);
3144 if (ExtractLane2 % 2 != 0)
3145 Inp2 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp2), 0);
3146 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Inp2, Inp1);
3147 SDValue NewIns =
3148 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32,
3149 Ins2.getOperand(0), SDValue(VINS, 0));
3150 ReplaceUses(Ins1, NewIns);
3151 return true;
3152 }
3153 }
3154
3155 // The inserted values are not extracted - if they are f16 then insert them
3156 // directly using a VINS.
3157 if (VT == MVT::v8f16 && Subtarget->hasFullFP16()) {
3158 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Val2, Val1);
3159 SDValue NewIns =
3160 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32,
3161 Ins2.getOperand(0), SDValue(VINS, 0));
3162 ReplaceUses(Ins1, NewIns);
3163 return true;
3164 }
3165
3166 return false;
3167 }
3168
transformFixedFloatingPointConversion(SDNode * N,SDNode * FMul,bool IsUnsigned,bool FixedToFloat)3169 bool ARMDAGToDAGISel::transformFixedFloatingPointConversion(SDNode *N,
3170 SDNode *FMul,
3171 bool IsUnsigned,
3172 bool FixedToFloat) {
3173 auto Type = N->getValueType(0);
3174 unsigned ScalarBits = Type.getScalarSizeInBits();
3175 if (ScalarBits > 32)
3176 return false;
3177
3178 SDNodeFlags FMulFlags = FMul->getFlags();
3179 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3180 // allowed in 16 bit unsigned floats
3181 if (ScalarBits == 16 && !FMulFlags.hasNoInfs() && IsUnsigned)
3182 return false;
3183
3184 SDValue ImmNode = FMul->getOperand(1);
3185 SDValue VecVal = FMul->getOperand(0);
3186 if (VecVal->getOpcode() == ISD::UINT_TO_FP ||
3187 VecVal->getOpcode() == ISD::SINT_TO_FP)
3188 VecVal = VecVal->getOperand(0);
3189
3190 if (VecVal.getValueType().getScalarSizeInBits() != ScalarBits)
3191 return false;
3192
3193 if (ImmNode.getOpcode() == ISD::BITCAST) {
3194 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3195 return false;
3196 ImmNode = ImmNode.getOperand(0);
3197 }
3198
3199 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3200 return false;
3201
3202 APFloat ImmAPF(0.0f);
3203 switch (ImmNode.getOpcode()) {
3204 case ARMISD::VMOVIMM:
3205 case ARMISD::VDUP: {
3206 if (!isa<ConstantSDNode>(ImmNode.getOperand(0)))
3207 return false;
3208 unsigned Imm = ImmNode.getConstantOperandVal(0);
3209 if (ImmNode.getOpcode() == ARMISD::VMOVIMM)
3210 Imm = ARM_AM::decodeVMOVModImm(Imm, ScalarBits);
3211 ImmAPF =
3212 APFloat(ScalarBits == 32 ? APFloat::IEEEsingle() : APFloat::IEEEhalf(),
3213 APInt(ScalarBits, Imm));
3214 break;
3215 }
3216 case ARMISD::VMOVFPIMM: {
3217 ImmAPF = APFloat(ARM_AM::getFPImmFloat(ImmNode.getConstantOperandVal(0)));
3218 break;
3219 }
3220 default:
3221 return false;
3222 }
3223
3224 // Where n is the number of fractional bits, multiplying by 2^n will convert
3225 // from float to fixed and multiplying by 2^-n will convert from fixed to
3226 // float. Taking log2 of the factor (after taking the inverse in the case of
3227 // float to fixed) will give n.
3228 APFloat ToConvert = ImmAPF;
3229 if (FixedToFloat) {
3230 if (!ImmAPF.getExactInverse(&ToConvert))
3231 return false;
3232 }
3233 APSInt Converted(64, false);
3234 bool IsExact;
3235 ToConvert.convertToInteger(Converted, llvm::RoundingMode::NearestTiesToEven,
3236 &IsExact);
3237 if (!IsExact || !Converted.isPowerOf2())
3238 return false;
3239
3240 unsigned FracBits = Converted.logBase2();
3241 if (FracBits > ScalarBits)
3242 return false;
3243
3244 SmallVector<SDValue, 3> Ops{
3245 VecVal, CurDAG->getConstant(FracBits, SDLoc(N), MVT::i32)};
3246 AddEmptyMVEPredicateToOps(Ops, SDLoc(N), Type);
3247
3248 unsigned int Opcode;
3249 switch (ScalarBits) {
3250 case 16:
3251 if (FixedToFloat)
3252 Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix;
3253 else
3254 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3255 break;
3256 case 32:
3257 if (FixedToFloat)
3258 Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix;
3259 else
3260 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3261 break;
3262 default:
3263 llvm_unreachable("unexpected number of scalar bits");
3264 break;
3265 }
3266
3267 ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), Type, Ops));
3268 return true;
3269 }
3270
tryFP_TO_INT(SDNode * N,SDLoc dl)3271 bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) {
3272 // Transform a floating-point to fixed-point conversion to a VCVT
3273 if (!Subtarget->hasMVEFloatOps())
3274 return false;
3275 EVT Type = N->getValueType(0);
3276 if (!Type.isVector())
3277 return false;
3278 unsigned int ScalarBits = Type.getScalarSizeInBits();
3279
3280 bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT ||
3281 N->getOpcode() == ISD::FP_TO_UINT_SAT;
3282 SDNode *Node = N->getOperand(0).getNode();
3283
3284 // floating-point to fixed-point with one fractional bit gets turned into an
3285 // FP_TO_[U|S]INT(FADD (x, x)) rather than an FP_TO_[U|S]INT(FMUL (x, y))
3286 if (Node->getOpcode() == ISD::FADD) {
3287 if (Node->getOperand(0) != Node->getOperand(1))
3288 return false;
3289 SDNodeFlags Flags = Node->getFlags();
3290 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3291 // allowed in 16 bit unsigned floats
3292 if (ScalarBits == 16 && !Flags.hasNoInfs() && IsUnsigned)
3293 return false;
3294
3295 unsigned Opcode;
3296 switch (ScalarBits) {
3297 case 16:
3298 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3299 break;
3300 case 32:
3301 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3302 break;
3303 }
3304 SmallVector<SDValue, 3> Ops{Node->getOperand(0),
3305 CurDAG->getConstant(1, dl, MVT::i32)};
3306 AddEmptyMVEPredicateToOps(Ops, dl, Type);
3307
3308 ReplaceNode(N, CurDAG->getMachineNode(Opcode, dl, Type, Ops));
3309 return true;
3310 }
3311
3312 if (Node->getOpcode() != ISD::FMUL)
3313 return false;
3314
3315 return transformFixedFloatingPointConversion(N, Node, IsUnsigned, false);
3316 }
3317
tryFMULFixed(SDNode * N,SDLoc dl)3318 bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) {
3319 // Transform a fixed-point to floating-point conversion to a VCVT
3320 if (!Subtarget->hasMVEFloatOps())
3321 return false;
3322 auto Type = N->getValueType(0);
3323 if (!Type.isVector())
3324 return false;
3325
3326 auto LHS = N->getOperand(0);
3327 if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP)
3328 return false;
3329
3330 return transformFixedFloatingPointConversion(
3331 N, N, LHS.getOpcode() == ISD::UINT_TO_FP, true);
3332 }
3333
tryV6T2BitfieldExtractOp(SDNode * N,bool isSigned)3334 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
3335 if (!Subtarget->hasV6T2Ops())
3336 return false;
3337
3338 unsigned Opc = isSigned
3339 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
3340 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
3341 SDLoc dl(N);
3342
3343 // For unsigned extracts, check for a shift right and mask
3344 unsigned And_imm = 0;
3345 if (N->getOpcode() == ISD::AND) {
3346 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
3347
3348 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
3349 if (And_imm & (And_imm + 1))
3350 return false;
3351
3352 unsigned Srl_imm = 0;
3353 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
3354 Srl_imm)) {
3355 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3356
3357 // Mask off the unnecessary bits of the AND immediate; normally
3358 // DAGCombine will do this, but that might not happen if
3359 // targetShrinkDemandedConstant chooses a different immediate.
3360 And_imm &= -1U >> Srl_imm;
3361
3362 // Note: The width operand is encoded as width-1.
3363 unsigned Width = llvm::countr_one(And_imm) - 1;
3364 unsigned LSB = Srl_imm;
3365
3366 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3367
3368 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
3369 // It's cheaper to use a right shift to extract the top bits.
3370 if (Subtarget->isThumb()) {
3371 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
3372 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3373 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3374 getAL(CurDAG, dl), Reg0, Reg0 };
3375 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3376 return true;
3377 }
3378
3379 // ARM models shift instructions as MOVsi with shifter operand.
3380 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
3381 SDValue ShOpc =
3382 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
3383 MVT::i32);
3384 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
3385 getAL(CurDAG, dl), Reg0, Reg0 };
3386 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
3387 return true;
3388 }
3389
3390 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3391 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3392 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3393 CurDAG->getTargetConstant(Width, dl, MVT::i32),
3394 getAL(CurDAG, dl), Reg0 };
3395 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3396 return true;
3397 }
3398 }
3399 return false;
3400 }
3401
3402 // Otherwise, we're looking for a shift of a shift
3403 unsigned Shl_imm = 0;
3404 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
3405 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
3406 unsigned Srl_imm = 0;
3407 if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
3408 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3409 // Note: The width operand is encoded as width-1.
3410 unsigned Width = 32 - Srl_imm - 1;
3411 int LSB = Srl_imm - Shl_imm;
3412 if (LSB < 0)
3413 return false;
3414 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3415 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3416 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3417 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3418 CurDAG->getTargetConstant(Width, dl, MVT::i32),
3419 getAL(CurDAG, dl), Reg0 };
3420 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3421 return true;
3422 }
3423 }
3424
3425 // Or we are looking for a shift of an and, with a mask operand
3426 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
3427 isShiftedMask_32(And_imm)) {
3428 unsigned Srl_imm = 0;
3429 unsigned LSB = llvm::countr_zero(And_imm);
3430 // Shift must be the same as the ands lsb
3431 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
3432 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3433 unsigned MSB = llvm::Log2_32(And_imm);
3434 // Note: The width operand is encoded as width-1.
3435 unsigned Width = MSB - LSB;
3436 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3437 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3438 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3439 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
3440 CurDAG->getTargetConstant(Width, dl, MVT::i32),
3441 getAL(CurDAG, dl), Reg0 };
3442 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3443 return true;
3444 }
3445 }
3446
3447 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
3448 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
3449 unsigned LSB = 0;
3450 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
3451 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
3452 return false;
3453
3454 if (LSB + Width > 32)
3455 return false;
3456
3457 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3458 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
3459 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3460 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3461 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
3462 getAL(CurDAG, dl), Reg0 };
3463 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3464 return true;
3465 }
3466
3467 return false;
3468 }
3469
3470 /// Target-specific DAG combining for ISD::SUB.
3471 /// Target-independent combining lowers SELECT_CC nodes of the form
3472 /// select_cc setg[ge] X, 0, X, -X
3473 /// select_cc setgt X, -1, X, -X
3474 /// select_cc setl[te] X, 0, -X, X
3475 /// select_cc setlt X, 1, -X, X
3476 /// which represent Integer ABS into:
3477 /// Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
3478 /// ARM instruction selection detects the latter and matches it to
3479 /// ARM::ABS or ARM::t2ABS machine node.
tryABSOp(SDNode * N)3480 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
3481 SDValue SUBSrc0 = N->getOperand(0);
3482 SDValue SUBSrc1 = N->getOperand(1);
3483 EVT VT = N->getValueType(0);
3484
3485 if (Subtarget->isThumb1Only())
3486 return false;
3487
3488 if (SUBSrc0.getOpcode() != ISD::XOR || SUBSrc1.getOpcode() != ISD::SRA)
3489 return false;
3490
3491 SDValue XORSrc0 = SUBSrc0.getOperand(0);
3492 SDValue XORSrc1 = SUBSrc0.getOperand(1);
3493 SDValue SRASrc0 = SUBSrc1.getOperand(0);
3494 SDValue SRASrc1 = SUBSrc1.getOperand(1);
3495 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
3496 EVT XType = SRASrc0.getValueType();
3497 unsigned Size = XType.getSizeInBits() - 1;
3498
3499 if (XORSrc1 == SUBSrc1 && XORSrc0 == SRASrc0 && XType.isInteger() &&
3500 SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) {
3501 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
3502 CurDAG->SelectNodeTo(N, Opcode, VT, XORSrc0);
3503 return true;
3504 }
3505
3506 return false;
3507 }
3508
3509 /// We've got special pseudo-instructions for these
SelectCMP_SWAP(SDNode * N)3510 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
3511 unsigned Opcode;
3512 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
3513 if (MemTy == MVT::i8)
3514 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_8 : ARM::CMP_SWAP_8;
3515 else if (MemTy == MVT::i16)
3516 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16;
3517 else if (MemTy == MVT::i32)
3518 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_32 : ARM::CMP_SWAP_32;
3519 else
3520 llvm_unreachable("Unknown AtomicCmpSwap type");
3521
3522 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
3523 N->getOperand(0)};
3524 SDNode *CmpSwap = CurDAG->getMachineNode(
3525 Opcode, SDLoc(N),
3526 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
3527
3528 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
3529 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
3530
3531 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
3532 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
3533 CurDAG->RemoveDeadNode(N);
3534 }
3535
3536 static std::optional<std::pair<unsigned, unsigned>>
getContiguousRangeOfSetBits(const APInt & A)3537 getContiguousRangeOfSetBits(const APInt &A) {
3538 unsigned FirstOne = A.getBitWidth() - A.countl_zero() - 1;
3539 unsigned LastOne = A.countr_zero();
3540 if (A.popcount() != (FirstOne - LastOne + 1))
3541 return std::nullopt;
3542 return std::make_pair(FirstOne, LastOne);
3543 }
3544
SelectCMPZ(SDNode * N,bool & SwitchEQNEToPLMI)3545 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
3546 assert(N->getOpcode() == ARMISD::CMPZ);
3547 SwitchEQNEToPLMI = false;
3548
3549 if (!Subtarget->isThumb())
3550 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
3551 // LSR don't exist as standalone instructions - they need the barrel shifter.
3552 return;
3553
3554 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
3555 SDValue And = N->getOperand(0);
3556 if (!And->hasOneUse())
3557 return;
3558
3559 SDValue Zero = N->getOperand(1);
3560 if (!isNullConstant(Zero) || And->getOpcode() != ISD::AND)
3561 return;
3562 SDValue X = And.getOperand(0);
3563 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
3564
3565 if (!C)
3566 return;
3567 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
3568 if (!Range)
3569 return;
3570
3571 // There are several ways to lower this:
3572 SDNode *NewN;
3573 SDLoc dl(N);
3574
3575 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
3576 if (Subtarget->isThumb2()) {
3577 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
3578 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3579 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3580 CurDAG->getRegister(0, MVT::i32) };
3581 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3582 } else {
3583 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
3584 CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3585 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3586 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3587 }
3588 };
3589
3590 if (Range->second == 0) {
3591 // 1. Mask includes the LSB -> Simply shift the top N bits off
3592 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3593 ReplaceNode(And.getNode(), NewN);
3594 } else if (Range->first == 31) {
3595 // 2. Mask includes the MSB -> Simply shift the bottom N bits off
3596 NewN = EmitShift(ARM::tLSRri, X, Range->second);
3597 ReplaceNode(And.getNode(), NewN);
3598 } else if (Range->first == Range->second) {
3599 // 3. Only one bit is set. We can shift this into the sign bit and use a
3600 // PL/MI comparison.
3601 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3602 ReplaceNode(And.getNode(), NewN);
3603
3604 SwitchEQNEToPLMI = true;
3605 } else if (!Subtarget->hasV6T2Ops()) {
3606 // 4. Do a double shift to clear bottom and top bits, but only in
3607 // thumb-1 mode as in thumb-2 we can use UBFX.
3608 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3609 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
3610 Range->second + (31 - Range->first));
3611 ReplaceNode(And.getNode(), NewN);
3612 }
3613 }
3614
getVectorShuffleOpcode(EVT VT,unsigned Opc64[3],unsigned Opc128[3])3615 static unsigned getVectorShuffleOpcode(EVT VT, unsigned Opc64[3],
3616 unsigned Opc128[3]) {
3617 assert((VT.is64BitVector() || VT.is128BitVector()) &&
3618 "Unexpected vector shuffle length");
3619 switch (VT.getScalarSizeInBits()) {
3620 default:
3621 llvm_unreachable("Unexpected vector shuffle element size");
3622 case 8:
3623 return VT.is64BitVector() ? Opc64[0] : Opc128[0];
3624 case 16:
3625 return VT.is64BitVector() ? Opc64[1] : Opc128[1];
3626 case 32:
3627 return VT.is64BitVector() ? Opc64[2] : Opc128[2];
3628 }
3629 }
3630
Select(SDNode * N)3631 void ARMDAGToDAGISel::Select(SDNode *N) {
3632 SDLoc dl(N);
3633
3634 if (N->isMachineOpcode()) {
3635 N->setNodeId(-1);
3636 return; // Already selected.
3637 }
3638
3639 switch (N->getOpcode()) {
3640 default: break;
3641 case ISD::STORE: {
3642 // For Thumb1, match an sp-relative store in C++. This is a little
3643 // unfortunate, but I don't think I can make the chain check work
3644 // otherwise. (The chain of the store has to be the same as the chain
3645 // of the CopyFromReg, or else we can't replace the CopyFromReg with
3646 // a direct reference to "SP".)
3647 //
3648 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
3649 // a different addressing mode from other four-byte stores.
3650 //
3651 // This pattern usually comes up with call arguments.
3652 StoreSDNode *ST = cast<StoreSDNode>(N);
3653 SDValue Ptr = ST->getBasePtr();
3654 if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
3655 int RHSC = 0;
3656 if (Ptr.getOpcode() == ISD::ADD &&
3657 isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
3658 Ptr = Ptr.getOperand(0);
3659
3660 if (Ptr.getOpcode() == ISD::CopyFromReg &&
3661 cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
3662 Ptr.getOperand(0) == ST->getChain()) {
3663 SDValue Ops[] = {ST->getValue(),
3664 CurDAG->getRegister(ARM::SP, MVT::i32),
3665 CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
3666 getAL(CurDAG, dl),
3667 CurDAG->getRegister(0, MVT::i32),
3668 ST->getChain()};
3669 MachineSDNode *ResNode =
3670 CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
3671 MachineMemOperand *MemOp = ST->getMemOperand();
3672 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
3673 ReplaceNode(N, ResNode);
3674 return;
3675 }
3676 }
3677 break;
3678 }
3679 case ISD::WRITE_REGISTER:
3680 if (tryWriteRegister(N))
3681 return;
3682 break;
3683 case ISD::READ_REGISTER:
3684 if (tryReadRegister(N))
3685 return;
3686 break;
3687 case ISD::INLINEASM:
3688 case ISD::INLINEASM_BR:
3689 if (tryInlineAsm(N))
3690 return;
3691 break;
3692 case ISD::SUB:
3693 // Select special operations if SUB node forms integer ABS pattern
3694 if (tryABSOp(N))
3695 return;
3696 // Other cases are autogenerated.
3697 break;
3698 case ISD::Constant: {
3699 unsigned Val = N->getAsZExtVal();
3700 // If we can't materialize the constant we need to use a literal pool
3701 if (ConstantMaterializationCost(Val, Subtarget) > 2 &&
3702 !Subtarget->genExecuteOnly()) {
3703 SDValue CPIdx = CurDAG->getTargetConstantPool(
3704 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
3705 TLI->getPointerTy(CurDAG->getDataLayout()));
3706
3707 SDNode *ResNode;
3708 if (Subtarget->isThumb()) {
3709 SDValue Ops[] = {
3710 CPIdx,
3711 getAL(CurDAG, dl),
3712 CurDAG->getRegister(0, MVT::i32),
3713 CurDAG->getEntryNode()
3714 };
3715 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
3716 Ops);
3717 } else {
3718 SDValue Ops[] = {
3719 CPIdx,
3720 CurDAG->getTargetConstant(0, dl, MVT::i32),
3721 getAL(CurDAG, dl),
3722 CurDAG->getRegister(0, MVT::i32),
3723 CurDAG->getEntryNode()
3724 };
3725 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
3726 Ops);
3727 }
3728 // Annotate the Node with memory operand information so that MachineInstr
3729 // queries work properly. This e.g. gives the register allocation the
3730 // required information for rematerialization.
3731 MachineFunction& MF = CurDAG->getMachineFunction();
3732 MachineMemOperand *MemOp =
3733 MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
3734 MachineMemOperand::MOLoad, 4, Align(4));
3735
3736 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
3737
3738 ReplaceNode(N, ResNode);
3739 return;
3740 }
3741
3742 // Other cases are autogenerated.
3743 break;
3744 }
3745 case ISD::FrameIndex: {
3746 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
3747 int FI = cast<FrameIndexSDNode>(N)->getIndex();
3748 SDValue TFI = CurDAG->getTargetFrameIndex(
3749 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
3750 if (Subtarget->isThumb1Only()) {
3751 // Set the alignment of the frame object to 4, to avoid having to generate
3752 // more than one ADD
3753 MachineFrameInfo &MFI = MF->getFrameInfo();
3754 if (MFI.getObjectAlign(FI) < Align(4))
3755 MFI.setObjectAlignment(FI, Align(4));
3756 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
3757 CurDAG->getTargetConstant(0, dl, MVT::i32));
3758 return;
3759 } else {
3760 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
3761 ARM::t2ADDri : ARM::ADDri);
3762 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
3763 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3764 CurDAG->getRegister(0, MVT::i32) };
3765 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3766 return;
3767 }
3768 }
3769 case ISD::INSERT_VECTOR_ELT: {
3770 if (tryInsertVectorElt(N))
3771 return;
3772 break;
3773 }
3774 case ISD::SRL:
3775 if (tryV6T2BitfieldExtractOp(N, false))
3776 return;
3777 break;
3778 case ISD::SIGN_EXTEND_INREG:
3779 case ISD::SRA:
3780 if (tryV6T2BitfieldExtractOp(N, true))
3781 return;
3782 break;
3783 case ISD::FP_TO_UINT:
3784 case ISD::FP_TO_SINT:
3785 case ISD::FP_TO_UINT_SAT:
3786 case ISD::FP_TO_SINT_SAT:
3787 if (tryFP_TO_INT(N, dl))
3788 return;
3789 break;
3790 case ISD::FMUL:
3791 if (tryFMULFixed(N, dl))
3792 return;
3793 break;
3794 case ISD::MUL:
3795 if (Subtarget->isThumb1Only())
3796 break;
3797 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
3798 unsigned RHSV = C->getZExtValue();
3799 if (!RHSV) break;
3800 if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
3801 unsigned ShImm = Log2_32(RHSV-1);
3802 if (ShImm >= 32)
3803 break;
3804 SDValue V = N->getOperand(0);
3805 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
3806 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3807 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3808 if (Subtarget->isThumb()) {
3809 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3810 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
3811 return;
3812 } else {
3813 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3814 Reg0 };
3815 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
3816 return;
3817 }
3818 }
3819 if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
3820 unsigned ShImm = Log2_32(RHSV+1);
3821 if (ShImm >= 32)
3822 break;
3823 SDValue V = N->getOperand(0);
3824 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
3825 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3826 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3827 if (Subtarget->isThumb()) {
3828 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3829 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
3830 return;
3831 } else {
3832 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3833 Reg0 };
3834 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
3835 return;
3836 }
3837 }
3838 }
3839 break;
3840 case ISD::AND: {
3841 // Check for unsigned bitfield extract
3842 if (tryV6T2BitfieldExtractOp(N, false))
3843 return;
3844
3845 // If an immediate is used in an AND node, it is possible that the immediate
3846 // can be more optimally materialized when negated. If this is the case we
3847 // can negate the immediate and use a BIC instead.
3848 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
3849 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
3850 uint32_t Imm = (uint32_t) N1C->getZExtValue();
3851
3852 // In Thumb2 mode, an AND can take a 12-bit immediate. If this
3853 // immediate can be negated and fit in the immediate operand of
3854 // a t2BIC, don't do any manual transform here as this can be
3855 // handled by the generic ISel machinery.
3856 bool PreferImmediateEncoding =
3857 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
3858 if (!PreferImmediateEncoding &&
3859 ConstantMaterializationCost(Imm, Subtarget) >
3860 ConstantMaterializationCost(~Imm, Subtarget)) {
3861 // The current immediate costs more to materialize than a negated
3862 // immediate, so negate the immediate and use a BIC.
3863 SDValue NewImm =
3864 CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
3865 // If the new constant didn't exist before, reposition it in the topological
3866 // ordering so it is just before N. Otherwise, don't touch its location.
3867 if (NewImm->getNodeId() == -1)
3868 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
3869
3870 if (!Subtarget->hasThumb2()) {
3871 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
3872 N->getOperand(0), NewImm, getAL(CurDAG, dl),
3873 CurDAG->getRegister(0, MVT::i32)};
3874 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
3875 return;
3876 } else {
3877 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
3878 CurDAG->getRegister(0, MVT::i32),
3879 CurDAG->getRegister(0, MVT::i32)};
3880 ReplaceNode(N,
3881 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
3882 return;
3883 }
3884 }
3885 }
3886
3887 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
3888 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
3889 // are entirely contributed by c2 and lower 16-bits are entirely contributed
3890 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
3891 // Select it to: "movt x, ((c1 & 0xffff) >> 16)
3892 EVT VT = N->getValueType(0);
3893 if (VT != MVT::i32)
3894 break;
3895 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
3896 ? ARM::t2MOVTi16
3897 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
3898 if (!Opc)
3899 break;
3900 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3901 N1C = dyn_cast<ConstantSDNode>(N1);
3902 if (!N1C)
3903 break;
3904 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
3905 SDValue N2 = N0.getOperand(1);
3906 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
3907 if (!N2C)
3908 break;
3909 unsigned N1CVal = N1C->getZExtValue();
3910 unsigned N2CVal = N2C->getZExtValue();
3911 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
3912 (N1CVal & 0xffffU) == 0xffffU &&
3913 (N2CVal & 0xffffU) == 0x0U) {
3914 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
3915 dl, MVT::i32);
3916 SDValue Ops[] = { N0.getOperand(0), Imm16,
3917 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
3918 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
3919 return;
3920 }
3921 }
3922
3923 break;
3924 }
3925 case ARMISD::UMAAL: {
3926 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
3927 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3928 N->getOperand(2), N->getOperand(3),
3929 getAL(CurDAG, dl),
3930 CurDAG->getRegister(0, MVT::i32) };
3931 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
3932 return;
3933 }
3934 case ARMISD::UMLAL:{
3935 if (Subtarget->isThumb()) {
3936 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3937 N->getOperand(3), getAL(CurDAG, dl),
3938 CurDAG->getRegister(0, MVT::i32)};
3939 ReplaceNode(
3940 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
3941 return;
3942 }else{
3943 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3944 N->getOperand(3), getAL(CurDAG, dl),
3945 CurDAG->getRegister(0, MVT::i32),
3946 CurDAG->getRegister(0, MVT::i32) };
3947 ReplaceNode(N, CurDAG->getMachineNode(
3948 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
3949 MVT::i32, MVT::i32, Ops));
3950 return;
3951 }
3952 }
3953 case ARMISD::SMLAL:{
3954 if (Subtarget->isThumb()) {
3955 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3956 N->getOperand(3), getAL(CurDAG, dl),
3957 CurDAG->getRegister(0, MVT::i32)};
3958 ReplaceNode(
3959 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
3960 return;
3961 }else{
3962 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3963 N->getOperand(3), getAL(CurDAG, dl),
3964 CurDAG->getRegister(0, MVT::i32),
3965 CurDAG->getRegister(0, MVT::i32) };
3966 ReplaceNode(N, CurDAG->getMachineNode(
3967 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3968 MVT::i32, MVT::i32, Ops));
3969 return;
3970 }
3971 }
3972 case ARMISD::SUBE: {
3973 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
3974 break;
3975 // Look for a pattern to match SMMLS
3976 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
3977 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
3978 N->getOperand(2).getOpcode() != ARMISD::SUBC ||
3979 !SDValue(N, 1).use_empty())
3980 break;
3981
3982 if (Subtarget->isThumb())
3983 assert(Subtarget->hasThumb2() &&
3984 "This pattern should not be generated for Thumb");
3985
3986 SDValue SmulLoHi = N->getOperand(1);
3987 SDValue Subc = N->getOperand(2);
3988 SDValue Zero = Subc.getOperand(0);
3989
3990 if (!isNullConstant(Zero) || Subc.getOperand(1) != SmulLoHi.getValue(0) ||
3991 N->getOperand(1) != SmulLoHi.getValue(1) ||
3992 N->getOperand(2) != Subc.getValue(1))
3993 break;
3994
3995 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
3996 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
3997 N->getOperand(0), getAL(CurDAG, dl),
3998 CurDAG->getRegister(0, MVT::i32) };
3999 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
4000 return;
4001 }
4002 case ISD::LOAD: {
4003 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
4004 return;
4005 if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
4006 if (tryT2IndexedLoad(N))
4007 return;
4008 } else if (Subtarget->isThumb()) {
4009 if (tryT1IndexedLoad(N))
4010 return;
4011 } else if (tryARMIndexedLoad(N))
4012 return;
4013 // Other cases are autogenerated.
4014 break;
4015 }
4016 case ISD::MLOAD:
4017 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
4018 return;
4019 // Other cases are autogenerated.
4020 break;
4021 case ARMISD::WLSSETUP: {
4022 SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32,
4023 N->getOperand(0));
4024 ReplaceUses(N, New);
4025 CurDAG->RemoveDeadNode(N);
4026 return;
4027 }
4028 case ARMISD::WLS: {
4029 SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other,
4030 N->getOperand(1), N->getOperand(2),
4031 N->getOperand(0));
4032 ReplaceUses(N, New);
4033 CurDAG->RemoveDeadNode(N);
4034 return;
4035 }
4036 case ARMISD::LE: {
4037 SDValue Ops[] = { N->getOperand(1),
4038 N->getOperand(2),
4039 N->getOperand(0) };
4040 unsigned Opc = ARM::t2LoopEnd;
4041 SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
4042 ReplaceUses(N, New);
4043 CurDAG->RemoveDeadNode(N);
4044 return;
4045 }
4046 case ARMISD::LDRD: {
4047 if (Subtarget->isThumb2())
4048 break; // TableGen handles isel in this case.
4049 SDValue Base, RegOffset, ImmOffset;
4050 const SDValue &Chain = N->getOperand(0);
4051 const SDValue &Addr = N->getOperand(1);
4052 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
4053 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
4054 // The register-offset variant of LDRD mandates that the register
4055 // allocated to RegOffset is not reused in any of the remaining operands.
4056 // This restriction is currently not enforced. Therefore emitting this
4057 // variant is explicitly avoided.
4058 Base = Addr;
4059 RegOffset = CurDAG->getRegister(0, MVT::i32);
4060 }
4061 SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain};
4062 SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl,
4063 {MVT::Untyped, MVT::Other}, Ops);
4064 SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4065 SDValue(New, 0));
4066 SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4067 SDValue(New, 0));
4068 transferMemOperands(N, New);
4069 ReplaceUses(SDValue(N, 0), Lo);
4070 ReplaceUses(SDValue(N, 1), Hi);
4071 ReplaceUses(SDValue(N, 2), SDValue(New, 1));
4072 CurDAG->RemoveDeadNode(N);
4073 return;
4074 }
4075 case ARMISD::STRD: {
4076 if (Subtarget->isThumb2())
4077 break; // TableGen handles isel in this case.
4078 SDValue Base, RegOffset, ImmOffset;
4079 const SDValue &Chain = N->getOperand(0);
4080 const SDValue &Addr = N->getOperand(3);
4081 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
4082 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
4083 // The register-offset variant of STRD mandates that the register
4084 // allocated to RegOffset is not reused in any of the remaining operands.
4085 // This restriction is currently not enforced. Therefore emitting this
4086 // variant is explicitly avoided.
4087 Base = Addr;
4088 RegOffset = CurDAG->getRegister(0, MVT::i32);
4089 }
4090 SDNode *RegPair =
4091 createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2));
4092 SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain};
4093 SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops);
4094 transferMemOperands(N, New);
4095 ReplaceUses(SDValue(N, 0), SDValue(New, 0));
4096 CurDAG->RemoveDeadNode(N);
4097 return;
4098 }
4099 case ARMISD::LOOP_DEC: {
4100 SDValue Ops[] = { N->getOperand(1),
4101 N->getOperand(2),
4102 N->getOperand(0) };
4103 SDNode *Dec =
4104 CurDAG->getMachineNode(ARM::t2LoopDec, dl,
4105 CurDAG->getVTList(MVT::i32, MVT::Other), Ops);
4106 ReplaceUses(N, Dec);
4107 CurDAG->RemoveDeadNode(N);
4108 return;
4109 }
4110 case ARMISD::BRCOND: {
4111 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4112 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4113 // Pattern complexity = 6 cost = 1 size = 0
4114
4115 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4116 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
4117 // Pattern complexity = 6 cost = 1 size = 0
4118
4119 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4120 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4121 // Pattern complexity = 6 cost = 1 size = 0
4122
4123 unsigned Opc = Subtarget->isThumb() ?
4124 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
4125 SDValue Chain = N->getOperand(0);
4126 SDValue N1 = N->getOperand(1);
4127 SDValue N2 = N->getOperand(2);
4128 SDValue N3 = N->getOperand(3);
4129 SDValue InGlue = N->getOperand(4);
4130 assert(N1.getOpcode() == ISD::BasicBlock);
4131 assert(N2.getOpcode() == ISD::Constant);
4132 assert(N3.getOpcode() == ISD::Register);
4133
4134 unsigned CC = (unsigned)N2->getAsZExtVal();
4135
4136 if (InGlue.getOpcode() == ARMISD::CMPZ) {
4137 if (InGlue.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
4138 SDValue Int = InGlue.getOperand(0);
4139 uint64_t ID = Int->getConstantOperandVal(1);
4140
4141 // Handle low-overhead loops.
4142 if (ID == Intrinsic::loop_decrement_reg) {
4143 SDValue Elements = Int.getOperand(2);
4144 SDValue Size = CurDAG->getTargetConstant(Int.getConstantOperandVal(3),
4145 dl, MVT::i32);
4146
4147 SDValue Args[] = { Elements, Size, Int.getOperand(0) };
4148 SDNode *LoopDec =
4149 CurDAG->getMachineNode(ARM::t2LoopDec, dl,
4150 CurDAG->getVTList(MVT::i32, MVT::Other),
4151 Args);
4152 ReplaceUses(Int.getNode(), LoopDec);
4153
4154 SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
4155 SDNode *LoopEnd =
4156 CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
4157
4158 ReplaceUses(N, LoopEnd);
4159 CurDAG->RemoveDeadNode(N);
4160 CurDAG->RemoveDeadNode(InGlue.getNode());
4161 CurDAG->RemoveDeadNode(Int.getNode());
4162 return;
4163 }
4164 }
4165
4166 bool SwitchEQNEToPLMI;
4167 SelectCMPZ(InGlue.getNode(), SwitchEQNEToPLMI);
4168 InGlue = N->getOperand(4);
4169
4170 if (SwitchEQNEToPLMI) {
4171 switch ((ARMCC::CondCodes)CC) {
4172 default: llvm_unreachable("CMPZ must be either NE or EQ!");
4173 case ARMCC::NE:
4174 CC = (unsigned)ARMCC::MI;
4175 break;
4176 case ARMCC::EQ:
4177 CC = (unsigned)ARMCC::PL;
4178 break;
4179 }
4180 }
4181 }
4182
4183 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
4184 SDValue Ops[] = { N1, Tmp2, N3, Chain, InGlue };
4185 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
4186 MVT::Glue, Ops);
4187 Chain = SDValue(ResNode, 0);
4188 if (N->getNumValues() == 2) {
4189 InGlue = SDValue(ResNode, 1);
4190 ReplaceUses(SDValue(N, 1), InGlue);
4191 }
4192 ReplaceUses(SDValue(N, 0),
4193 SDValue(Chain.getNode(), Chain.getResNo()));
4194 CurDAG->RemoveDeadNode(N);
4195 return;
4196 }
4197
4198 case ARMISD::CMPZ: {
4199 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
4200 // This allows us to avoid materializing the expensive negative constant.
4201 // The CMPZ #0 is useless and will be peepholed away but we need to keep it
4202 // for its glue output.
4203 SDValue X = N->getOperand(0);
4204 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
4205 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
4206 int64_t Addend = -C->getSExtValue();
4207
4208 SDNode *Add = nullptr;
4209 // ADDS can be better than CMN if the immediate fits in a
4210 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
4211 // Outside that range we can just use a CMN which is 32-bit but has a
4212 // 12-bit immediate range.
4213 if (Addend < 1<<8) {
4214 if (Subtarget->isThumb2()) {
4215 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
4216 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
4217 CurDAG->getRegister(0, MVT::i32) };
4218 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
4219 } else {
4220 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
4221 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
4222 CurDAG->getTargetConstant(Addend, dl, MVT::i32),
4223 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
4224 Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
4225 }
4226 }
4227 if (Add) {
4228 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
4229 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
4230 }
4231 }
4232 // Other cases are autogenerated.
4233 break;
4234 }
4235
4236 case ARMISD::CMOV: {
4237 SDValue InGlue = N->getOperand(4);
4238
4239 if (InGlue.getOpcode() == ARMISD::CMPZ) {
4240 bool SwitchEQNEToPLMI;
4241 SelectCMPZ(InGlue.getNode(), SwitchEQNEToPLMI);
4242
4243 if (SwitchEQNEToPLMI) {
4244 SDValue ARMcc = N->getOperand(2);
4245 ARMCC::CondCodes CC = (ARMCC::CondCodes)ARMcc->getAsZExtVal();
4246
4247 switch (CC) {
4248 default: llvm_unreachable("CMPZ must be either NE or EQ!");
4249 case ARMCC::NE:
4250 CC = ARMCC::MI;
4251 break;
4252 case ARMCC::EQ:
4253 CC = ARMCC::PL;
4254 break;
4255 }
4256 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
4257 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
4258 N->getOperand(3), N->getOperand(4)};
4259 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
4260 }
4261
4262 }
4263 // Other cases are autogenerated.
4264 break;
4265 }
4266 case ARMISD::VZIP: {
4267 EVT VT = N->getValueType(0);
4268 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4269 unsigned Opc64[] = {ARM::VZIPd8, ARM::VZIPd16, ARM::VTRNd32};
4270 unsigned Opc128[] = {ARM::VZIPq8, ARM::VZIPq16, ARM::VZIPq32};
4271 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4272 SDValue Pred = getAL(CurDAG, dl);
4273 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4274 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
4275 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4276 return;
4277 }
4278 case ARMISD::VUZP: {
4279 EVT VT = N->getValueType(0);
4280 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4281 unsigned Opc64[] = {ARM::VUZPd8, ARM::VUZPd16, ARM::VTRNd32};
4282 unsigned Opc128[] = {ARM::VUZPq8, ARM::VUZPq16, ARM::VUZPq32};
4283 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4284 SDValue Pred = getAL(CurDAG, dl);
4285 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4286 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
4287 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4288 return;
4289 }
4290 case ARMISD::VTRN: {
4291 EVT VT = N->getValueType(0);
4292 unsigned Opc64[] = {ARM::VTRNd8, ARM::VTRNd16, ARM::VTRNd32};
4293 unsigned Opc128[] = {ARM::VTRNq8, ARM::VTRNq16, ARM::VTRNq32};
4294 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4295 SDValue Pred = getAL(CurDAG, dl);
4296 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4297 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
4298 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4299 return;
4300 }
4301 case ARMISD::BUILD_VECTOR: {
4302 EVT VecVT = N->getValueType(0);
4303 EVT EltVT = VecVT.getVectorElementType();
4304 unsigned NumElts = VecVT.getVectorNumElements();
4305 if (EltVT == MVT::f64) {
4306 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
4307 ReplaceNode(
4308 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
4309 return;
4310 }
4311 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
4312 if (NumElts == 2) {
4313 ReplaceNode(
4314 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
4315 return;
4316 }
4317 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
4318 ReplaceNode(N,
4319 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
4320 N->getOperand(2), N->getOperand(3)));
4321 return;
4322 }
4323
4324 case ARMISD::VLD1DUP: {
4325 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
4326 ARM::VLD1DUPd32 };
4327 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
4328 ARM::VLD1DUPq32 };
4329 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
4330 return;
4331 }
4332
4333 case ARMISD::VLD2DUP: {
4334 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4335 ARM::VLD2DUPd32 };
4336 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
4337 return;
4338 }
4339
4340 case ARMISD::VLD3DUP: {
4341 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
4342 ARM::VLD3DUPd16Pseudo,
4343 ARM::VLD3DUPd32Pseudo };
4344 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
4345 return;
4346 }
4347
4348 case ARMISD::VLD4DUP: {
4349 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
4350 ARM::VLD4DUPd16Pseudo,
4351 ARM::VLD4DUPd32Pseudo };
4352 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
4353 return;
4354 }
4355
4356 case ARMISD::VLD1DUP_UPD: {
4357 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
4358 ARM::VLD1DUPd16wb_fixed,
4359 ARM::VLD1DUPd32wb_fixed };
4360 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
4361 ARM::VLD1DUPq16wb_fixed,
4362 ARM::VLD1DUPq32wb_fixed };
4363 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
4364 return;
4365 }
4366
4367 case ARMISD::VLD2DUP_UPD: {
4368 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8wb_fixed,
4369 ARM::VLD2DUPd16wb_fixed,
4370 ARM::VLD2DUPd32wb_fixed,
4371 ARM::VLD1q64wb_fixed };
4372 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4373 ARM::VLD2DUPq16EvenPseudo,
4374 ARM::VLD2DUPq32EvenPseudo };
4375 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudoWB_fixed,
4376 ARM::VLD2DUPq16OddPseudoWB_fixed,
4377 ARM::VLD2DUPq32OddPseudoWB_fixed };
4378 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, DOpcodes, QOpcodes0, QOpcodes1);
4379 return;
4380 }
4381
4382 case ARMISD::VLD3DUP_UPD: {
4383 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
4384 ARM::VLD3DUPd16Pseudo_UPD,
4385 ARM::VLD3DUPd32Pseudo_UPD,
4386 ARM::VLD1d64TPseudoWB_fixed };
4387 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4388 ARM::VLD3DUPq16EvenPseudo,
4389 ARM::VLD3DUPq32EvenPseudo };
4390 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo_UPD,
4391 ARM::VLD3DUPq16OddPseudo_UPD,
4392 ARM::VLD3DUPq32OddPseudo_UPD };
4393 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4394 return;
4395 }
4396
4397 case ARMISD::VLD4DUP_UPD: {
4398 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
4399 ARM::VLD4DUPd16Pseudo_UPD,
4400 ARM::VLD4DUPd32Pseudo_UPD,
4401 ARM::VLD1d64QPseudoWB_fixed };
4402 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4403 ARM::VLD4DUPq16EvenPseudo,
4404 ARM::VLD4DUPq32EvenPseudo };
4405 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo_UPD,
4406 ARM::VLD4DUPq16OddPseudo_UPD,
4407 ARM::VLD4DUPq32OddPseudo_UPD };
4408 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4409 return;
4410 }
4411
4412 case ARMISD::VLD1_UPD: {
4413 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
4414 ARM::VLD1d16wb_fixed,
4415 ARM::VLD1d32wb_fixed,
4416 ARM::VLD1d64wb_fixed };
4417 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
4418 ARM::VLD1q16wb_fixed,
4419 ARM::VLD1q32wb_fixed,
4420 ARM::VLD1q64wb_fixed };
4421 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
4422 return;
4423 }
4424
4425 case ARMISD::VLD2_UPD: {
4426 if (Subtarget->hasNEON()) {
4427 static const uint16_t DOpcodes[] = {
4428 ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed,
4429 ARM::VLD1q64wb_fixed};
4430 static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed,
4431 ARM::VLD2q16PseudoWB_fixed,
4432 ARM::VLD2q32PseudoWB_fixed};
4433 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
4434 } else {
4435 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8,
4436 ARM::MVE_VLD21_8_wb};
4437 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
4438 ARM::MVE_VLD21_16_wb};
4439 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
4440 ARM::MVE_VLD21_32_wb};
4441 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4442 SelectMVE_VLD(N, 2, Opcodes, true);
4443 }
4444 return;
4445 }
4446
4447 case ARMISD::VLD3_UPD: {
4448 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
4449 ARM::VLD3d16Pseudo_UPD,
4450 ARM::VLD3d32Pseudo_UPD,
4451 ARM::VLD1d64TPseudoWB_fixed};
4452 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4453 ARM::VLD3q16Pseudo_UPD,
4454 ARM::VLD3q32Pseudo_UPD };
4455 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
4456 ARM::VLD3q16oddPseudo_UPD,
4457 ARM::VLD3q32oddPseudo_UPD };
4458 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4459 return;
4460 }
4461
4462 case ARMISD::VLD4_UPD: {
4463 if (Subtarget->hasNEON()) {
4464 static const uint16_t DOpcodes[] = {
4465 ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD,
4466 ARM::VLD1d64QPseudoWB_fixed};
4467 static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD,
4468 ARM::VLD4q16Pseudo_UPD,
4469 ARM::VLD4q32Pseudo_UPD};
4470 static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD,
4471 ARM::VLD4q16oddPseudo_UPD,
4472 ARM::VLD4q32oddPseudo_UPD};
4473 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4474 } else {
4475 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
4476 ARM::MVE_VLD42_8,
4477 ARM::MVE_VLD43_8_wb};
4478 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
4479 ARM::MVE_VLD42_16,
4480 ARM::MVE_VLD43_16_wb};
4481 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
4482 ARM::MVE_VLD42_32,
4483 ARM::MVE_VLD43_32_wb};
4484 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4485 SelectMVE_VLD(N, 4, Opcodes, true);
4486 }
4487 return;
4488 }
4489
4490 case ARMISD::VLD1x2_UPD: {
4491 if (Subtarget->hasNEON()) {
4492 static const uint16_t DOpcodes[] = {
4493 ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed,
4494 ARM::VLD1q64wb_fixed};
4495 static const uint16_t QOpcodes[] = {
4496 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4497 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4498 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
4499 return;
4500 }
4501 break;
4502 }
4503
4504 case ARMISD::VLD1x3_UPD: {
4505 if (Subtarget->hasNEON()) {
4506 static const uint16_t DOpcodes[] = {
4507 ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed,
4508 ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed};
4509 static const uint16_t QOpcodes0[] = {
4510 ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD,
4511 ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD};
4512 static const uint16_t QOpcodes1[] = {
4513 ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD,
4514 ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD};
4515 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4516 return;
4517 }
4518 break;
4519 }
4520
4521 case ARMISD::VLD1x4_UPD: {
4522 if (Subtarget->hasNEON()) {
4523 static const uint16_t DOpcodes[] = {
4524 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4525 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4526 static const uint16_t QOpcodes0[] = {
4527 ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD,
4528 ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD};
4529 static const uint16_t QOpcodes1[] = {
4530 ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD,
4531 ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD};
4532 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4533 return;
4534 }
4535 break;
4536 }
4537
4538 case ARMISD::VLD2LN_UPD: {
4539 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
4540 ARM::VLD2LNd16Pseudo_UPD,
4541 ARM::VLD2LNd32Pseudo_UPD };
4542 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
4543 ARM::VLD2LNq32Pseudo_UPD };
4544 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
4545 return;
4546 }
4547
4548 case ARMISD::VLD3LN_UPD: {
4549 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
4550 ARM::VLD3LNd16Pseudo_UPD,
4551 ARM::VLD3LNd32Pseudo_UPD };
4552 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
4553 ARM::VLD3LNq32Pseudo_UPD };
4554 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
4555 return;
4556 }
4557
4558 case ARMISD::VLD4LN_UPD: {
4559 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
4560 ARM::VLD4LNd16Pseudo_UPD,
4561 ARM::VLD4LNd32Pseudo_UPD };
4562 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
4563 ARM::VLD4LNq32Pseudo_UPD };
4564 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
4565 return;
4566 }
4567
4568 case ARMISD::VST1_UPD: {
4569 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
4570 ARM::VST1d16wb_fixed,
4571 ARM::VST1d32wb_fixed,
4572 ARM::VST1d64wb_fixed };
4573 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
4574 ARM::VST1q16wb_fixed,
4575 ARM::VST1q32wb_fixed,
4576 ARM::VST1q64wb_fixed };
4577 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
4578 return;
4579 }
4580
4581 case ARMISD::VST2_UPD: {
4582 if (Subtarget->hasNEON()) {
4583 static const uint16_t DOpcodes[] = {
4584 ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed,
4585 ARM::VST1q64wb_fixed};
4586 static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed,
4587 ARM::VST2q16PseudoWB_fixed,
4588 ARM::VST2q32PseudoWB_fixed};
4589 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
4590 return;
4591 }
4592 break;
4593 }
4594
4595 case ARMISD::VST3_UPD: {
4596 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
4597 ARM::VST3d16Pseudo_UPD,
4598 ARM::VST3d32Pseudo_UPD,
4599 ARM::VST1d64TPseudoWB_fixed};
4600 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
4601 ARM::VST3q16Pseudo_UPD,
4602 ARM::VST3q32Pseudo_UPD };
4603 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
4604 ARM::VST3q16oddPseudo_UPD,
4605 ARM::VST3q32oddPseudo_UPD };
4606 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4607 return;
4608 }
4609
4610 case ARMISD::VST4_UPD: {
4611 if (Subtarget->hasNEON()) {
4612 static const uint16_t DOpcodes[] = {
4613 ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD,
4614 ARM::VST1d64QPseudoWB_fixed};
4615 static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD,
4616 ARM::VST4q16Pseudo_UPD,
4617 ARM::VST4q32Pseudo_UPD};
4618 static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD,
4619 ARM::VST4q16oddPseudo_UPD,
4620 ARM::VST4q32oddPseudo_UPD};
4621 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4622 return;
4623 }
4624 break;
4625 }
4626
4627 case ARMISD::VST1x2_UPD: {
4628 if (Subtarget->hasNEON()) {
4629 static const uint16_t DOpcodes[] = { ARM::VST1q8wb_fixed,
4630 ARM::VST1q16wb_fixed,
4631 ARM::VST1q32wb_fixed,
4632 ARM::VST1q64wb_fixed};
4633 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4634 ARM::VST1d16QPseudoWB_fixed,
4635 ARM::VST1d32QPseudoWB_fixed,
4636 ARM::VST1d64QPseudoWB_fixed };
4637 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
4638 return;
4639 }
4640 break;
4641 }
4642
4643 case ARMISD::VST1x3_UPD: {
4644 if (Subtarget->hasNEON()) {
4645 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudoWB_fixed,
4646 ARM::VST1d16TPseudoWB_fixed,
4647 ARM::VST1d32TPseudoWB_fixed,
4648 ARM::VST1d64TPseudoWB_fixed };
4649 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
4650 ARM::VST1q16LowTPseudo_UPD,
4651 ARM::VST1q32LowTPseudo_UPD,
4652 ARM::VST1q64LowTPseudo_UPD };
4653 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo_UPD,
4654 ARM::VST1q16HighTPseudo_UPD,
4655 ARM::VST1q32HighTPseudo_UPD,
4656 ARM::VST1q64HighTPseudo_UPD };
4657 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4658 return;
4659 }
4660 break;
4661 }
4662
4663 case ARMISD::VST1x4_UPD: {
4664 if (Subtarget->hasNEON()) {
4665 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4666 ARM::VST1d16QPseudoWB_fixed,
4667 ARM::VST1d32QPseudoWB_fixed,
4668 ARM::VST1d64QPseudoWB_fixed };
4669 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
4670 ARM::VST1q16LowQPseudo_UPD,
4671 ARM::VST1q32LowQPseudo_UPD,
4672 ARM::VST1q64LowQPseudo_UPD };
4673 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo_UPD,
4674 ARM::VST1q16HighQPseudo_UPD,
4675 ARM::VST1q32HighQPseudo_UPD,
4676 ARM::VST1q64HighQPseudo_UPD };
4677 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4678 return;
4679 }
4680 break;
4681 }
4682 case ARMISD::VST2LN_UPD: {
4683 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
4684 ARM::VST2LNd16Pseudo_UPD,
4685 ARM::VST2LNd32Pseudo_UPD };
4686 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
4687 ARM::VST2LNq32Pseudo_UPD };
4688 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
4689 return;
4690 }
4691
4692 case ARMISD::VST3LN_UPD: {
4693 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
4694 ARM::VST3LNd16Pseudo_UPD,
4695 ARM::VST3LNd32Pseudo_UPD };
4696 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
4697 ARM::VST3LNq32Pseudo_UPD };
4698 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
4699 return;
4700 }
4701
4702 case ARMISD::VST4LN_UPD: {
4703 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
4704 ARM::VST4LNd16Pseudo_UPD,
4705 ARM::VST4LNd32Pseudo_UPD };
4706 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
4707 ARM::VST4LNq32Pseudo_UPD };
4708 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
4709 return;
4710 }
4711
4712 case ISD::INTRINSIC_VOID:
4713 case ISD::INTRINSIC_W_CHAIN: {
4714 unsigned IntNo = N->getConstantOperandVal(1);
4715 switch (IntNo) {
4716 default:
4717 break;
4718
4719 case Intrinsic::arm_mrrc:
4720 case Intrinsic::arm_mrrc2: {
4721 SDLoc dl(N);
4722 SDValue Chain = N->getOperand(0);
4723 unsigned Opc;
4724
4725 if (Subtarget->isThumb())
4726 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
4727 else
4728 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
4729
4730 SmallVector<SDValue, 5> Ops;
4731 Ops.push_back(getI32Imm(N->getConstantOperandVal(2), dl)); /* coproc */
4732 Ops.push_back(getI32Imm(N->getConstantOperandVal(3), dl)); /* opc */
4733 Ops.push_back(getI32Imm(N->getConstantOperandVal(4), dl)); /* CRm */
4734
4735 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
4736 // instruction will always be '1111' but it is possible in assembly language to specify
4737 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
4738 if (Opc != ARM::MRRC2) {
4739 Ops.push_back(getAL(CurDAG, dl));
4740 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4741 }
4742
4743 Ops.push_back(Chain);
4744
4745 // Writes to two registers.
4746 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
4747
4748 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
4749 return;
4750 }
4751 case Intrinsic::arm_ldaexd:
4752 case Intrinsic::arm_ldrexd: {
4753 SDLoc dl(N);
4754 SDValue Chain = N->getOperand(0);
4755 SDValue MemAddr = N->getOperand(2);
4756 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
4757
4758 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
4759 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
4760 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
4761
4762 // arm_ldrexd returns a i64 value in {i32, i32}
4763 std::vector<EVT> ResTys;
4764 if (isThumb) {
4765 ResTys.push_back(MVT::i32);
4766 ResTys.push_back(MVT::i32);
4767 } else
4768 ResTys.push_back(MVT::Untyped);
4769 ResTys.push_back(MVT::Other);
4770
4771 // Place arguments in the right order.
4772 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
4773 CurDAG->getRegister(0, MVT::i32), Chain};
4774 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
4775 // Transfer memoperands.
4776 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
4777 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4778
4779 // Remap uses.
4780 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
4781 if (!SDValue(N, 0).use_empty()) {
4782 SDValue Result;
4783 if (isThumb)
4784 Result = SDValue(Ld, 0);
4785 else {
4786 SDValue SubRegIdx =
4787 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
4788 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
4789 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
4790 Result = SDValue(ResNode,0);
4791 }
4792 ReplaceUses(SDValue(N, 0), Result);
4793 }
4794 if (!SDValue(N, 1).use_empty()) {
4795 SDValue Result;
4796 if (isThumb)
4797 Result = SDValue(Ld, 1);
4798 else {
4799 SDValue SubRegIdx =
4800 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
4801 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
4802 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
4803 Result = SDValue(ResNode,0);
4804 }
4805 ReplaceUses(SDValue(N, 1), Result);
4806 }
4807 ReplaceUses(SDValue(N, 2), OutChain);
4808 CurDAG->RemoveDeadNode(N);
4809 return;
4810 }
4811 case Intrinsic::arm_stlexd:
4812 case Intrinsic::arm_strexd: {
4813 SDLoc dl(N);
4814 SDValue Chain = N->getOperand(0);
4815 SDValue Val0 = N->getOperand(2);
4816 SDValue Val1 = N->getOperand(3);
4817 SDValue MemAddr = N->getOperand(4);
4818
4819 // Store exclusive double return a i32 value which is the return status
4820 // of the issued store.
4821 const EVT ResTys[] = {MVT::i32, MVT::Other};
4822
4823 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
4824 // Place arguments in the right order.
4825 SmallVector<SDValue, 7> Ops;
4826 if (isThumb) {
4827 Ops.push_back(Val0);
4828 Ops.push_back(Val1);
4829 } else
4830 // arm_strexd uses GPRPair.
4831 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
4832 Ops.push_back(MemAddr);
4833 Ops.push_back(getAL(CurDAG, dl));
4834 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4835 Ops.push_back(Chain);
4836
4837 bool IsRelease = IntNo == Intrinsic::arm_stlexd;
4838 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
4839 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
4840
4841 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
4842 // Transfer memoperands.
4843 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
4844 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4845
4846 ReplaceNode(N, St);
4847 return;
4848 }
4849
4850 case Intrinsic::arm_neon_vld1: {
4851 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
4852 ARM::VLD1d32, ARM::VLD1d64 };
4853 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4854 ARM::VLD1q32, ARM::VLD1q64};
4855 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
4856 return;
4857 }
4858
4859 case Intrinsic::arm_neon_vld1x2: {
4860 static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4861 ARM::VLD1q32, ARM::VLD1q64 };
4862 static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
4863 ARM::VLD1d16QPseudo,
4864 ARM::VLD1d32QPseudo,
4865 ARM::VLD1d64QPseudo };
4866 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
4867 return;
4868 }
4869
4870 case Intrinsic::arm_neon_vld1x3: {
4871 static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
4872 ARM::VLD1d16TPseudo,
4873 ARM::VLD1d32TPseudo,
4874 ARM::VLD1d64TPseudo };
4875 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
4876 ARM::VLD1q16LowTPseudo_UPD,
4877 ARM::VLD1q32LowTPseudo_UPD,
4878 ARM::VLD1q64LowTPseudo_UPD };
4879 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
4880 ARM::VLD1q16HighTPseudo,
4881 ARM::VLD1q32HighTPseudo,
4882 ARM::VLD1q64HighTPseudo };
4883 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4884 return;
4885 }
4886
4887 case Intrinsic::arm_neon_vld1x4: {
4888 static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
4889 ARM::VLD1d16QPseudo,
4890 ARM::VLD1d32QPseudo,
4891 ARM::VLD1d64QPseudo };
4892 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
4893 ARM::VLD1q16LowQPseudo_UPD,
4894 ARM::VLD1q32LowQPseudo_UPD,
4895 ARM::VLD1q64LowQPseudo_UPD };
4896 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
4897 ARM::VLD1q16HighQPseudo,
4898 ARM::VLD1q32HighQPseudo,
4899 ARM::VLD1q64HighQPseudo };
4900 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4901 return;
4902 }
4903
4904 case Intrinsic::arm_neon_vld2: {
4905 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
4906 ARM::VLD2d32, ARM::VLD1q64 };
4907 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
4908 ARM::VLD2q32Pseudo };
4909 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
4910 return;
4911 }
4912
4913 case Intrinsic::arm_neon_vld3: {
4914 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
4915 ARM::VLD3d16Pseudo,
4916 ARM::VLD3d32Pseudo,
4917 ARM::VLD1d64TPseudo };
4918 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4919 ARM::VLD3q16Pseudo_UPD,
4920 ARM::VLD3q32Pseudo_UPD };
4921 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
4922 ARM::VLD3q16oddPseudo,
4923 ARM::VLD3q32oddPseudo };
4924 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4925 return;
4926 }
4927
4928 case Intrinsic::arm_neon_vld4: {
4929 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
4930 ARM::VLD4d16Pseudo,
4931 ARM::VLD4d32Pseudo,
4932 ARM::VLD1d64QPseudo };
4933 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
4934 ARM::VLD4q16Pseudo_UPD,
4935 ARM::VLD4q32Pseudo_UPD };
4936 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
4937 ARM::VLD4q16oddPseudo,
4938 ARM::VLD4q32oddPseudo };
4939 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4940 return;
4941 }
4942
4943 case Intrinsic::arm_neon_vld2dup: {
4944 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4945 ARM::VLD2DUPd32, ARM::VLD1q64 };
4946 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4947 ARM::VLD2DUPq16EvenPseudo,
4948 ARM::VLD2DUPq32EvenPseudo };
4949 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
4950 ARM::VLD2DUPq16OddPseudo,
4951 ARM::VLD2DUPq32OddPseudo };
4952 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
4953 DOpcodes, QOpcodes0, QOpcodes1);
4954 return;
4955 }
4956
4957 case Intrinsic::arm_neon_vld3dup: {
4958 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
4959 ARM::VLD3DUPd16Pseudo,
4960 ARM::VLD3DUPd32Pseudo,
4961 ARM::VLD1d64TPseudo };
4962 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4963 ARM::VLD3DUPq16EvenPseudo,
4964 ARM::VLD3DUPq32EvenPseudo };
4965 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
4966 ARM::VLD3DUPq16OddPseudo,
4967 ARM::VLD3DUPq32OddPseudo };
4968 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
4969 DOpcodes, QOpcodes0, QOpcodes1);
4970 return;
4971 }
4972
4973 case Intrinsic::arm_neon_vld4dup: {
4974 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
4975 ARM::VLD4DUPd16Pseudo,
4976 ARM::VLD4DUPd32Pseudo,
4977 ARM::VLD1d64QPseudo };
4978 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4979 ARM::VLD4DUPq16EvenPseudo,
4980 ARM::VLD4DUPq32EvenPseudo };
4981 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
4982 ARM::VLD4DUPq16OddPseudo,
4983 ARM::VLD4DUPq32OddPseudo };
4984 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
4985 DOpcodes, QOpcodes0, QOpcodes1);
4986 return;
4987 }
4988
4989 case Intrinsic::arm_neon_vld2lane: {
4990 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
4991 ARM::VLD2LNd16Pseudo,
4992 ARM::VLD2LNd32Pseudo };
4993 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
4994 ARM::VLD2LNq32Pseudo };
4995 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
4996 return;
4997 }
4998
4999 case Intrinsic::arm_neon_vld3lane: {
5000 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
5001 ARM::VLD3LNd16Pseudo,
5002 ARM::VLD3LNd32Pseudo };
5003 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
5004 ARM::VLD3LNq32Pseudo };
5005 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
5006 return;
5007 }
5008
5009 case Intrinsic::arm_neon_vld4lane: {
5010 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
5011 ARM::VLD4LNd16Pseudo,
5012 ARM::VLD4LNd32Pseudo };
5013 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
5014 ARM::VLD4LNq32Pseudo };
5015 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
5016 return;
5017 }
5018
5019 case Intrinsic::arm_neon_vst1: {
5020 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
5021 ARM::VST1d32, ARM::VST1d64 };
5022 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
5023 ARM::VST1q32, ARM::VST1q64 };
5024 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
5025 return;
5026 }
5027
5028 case Intrinsic::arm_neon_vst1x2: {
5029 static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
5030 ARM::VST1q32, ARM::VST1q64 };
5031 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
5032 ARM::VST1d16QPseudo,
5033 ARM::VST1d32QPseudo,
5034 ARM::VST1d64QPseudo };
5035 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
5036 return;
5037 }
5038
5039 case Intrinsic::arm_neon_vst1x3: {
5040 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
5041 ARM::VST1d16TPseudo,
5042 ARM::VST1d32TPseudo,
5043 ARM::VST1d64TPseudo };
5044 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
5045 ARM::VST1q16LowTPseudo_UPD,
5046 ARM::VST1q32LowTPseudo_UPD,
5047 ARM::VST1q64LowTPseudo_UPD };
5048 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
5049 ARM::VST1q16HighTPseudo,
5050 ARM::VST1q32HighTPseudo,
5051 ARM::VST1q64HighTPseudo };
5052 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
5053 return;
5054 }
5055
5056 case Intrinsic::arm_neon_vst1x4: {
5057 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
5058 ARM::VST1d16QPseudo,
5059 ARM::VST1d32QPseudo,
5060 ARM::VST1d64QPseudo };
5061 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
5062 ARM::VST1q16LowQPseudo_UPD,
5063 ARM::VST1q32LowQPseudo_UPD,
5064 ARM::VST1q64LowQPseudo_UPD };
5065 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
5066 ARM::VST1q16HighQPseudo,
5067 ARM::VST1q32HighQPseudo,
5068 ARM::VST1q64HighQPseudo };
5069 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
5070 return;
5071 }
5072
5073 case Intrinsic::arm_neon_vst2: {
5074 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
5075 ARM::VST2d32, ARM::VST1q64 };
5076 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
5077 ARM::VST2q32Pseudo };
5078 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
5079 return;
5080 }
5081
5082 case Intrinsic::arm_neon_vst3: {
5083 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
5084 ARM::VST3d16Pseudo,
5085 ARM::VST3d32Pseudo,
5086 ARM::VST1d64TPseudo };
5087 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
5088 ARM::VST3q16Pseudo_UPD,
5089 ARM::VST3q32Pseudo_UPD };
5090 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
5091 ARM::VST3q16oddPseudo,
5092 ARM::VST3q32oddPseudo };
5093 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
5094 return;
5095 }
5096
5097 case Intrinsic::arm_neon_vst4: {
5098 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
5099 ARM::VST4d16Pseudo,
5100 ARM::VST4d32Pseudo,
5101 ARM::VST1d64QPseudo };
5102 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
5103 ARM::VST4q16Pseudo_UPD,
5104 ARM::VST4q32Pseudo_UPD };
5105 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
5106 ARM::VST4q16oddPseudo,
5107 ARM::VST4q32oddPseudo };
5108 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
5109 return;
5110 }
5111
5112 case Intrinsic::arm_neon_vst2lane: {
5113 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
5114 ARM::VST2LNd16Pseudo,
5115 ARM::VST2LNd32Pseudo };
5116 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
5117 ARM::VST2LNq32Pseudo };
5118 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
5119 return;
5120 }
5121
5122 case Intrinsic::arm_neon_vst3lane: {
5123 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
5124 ARM::VST3LNd16Pseudo,
5125 ARM::VST3LNd32Pseudo };
5126 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
5127 ARM::VST3LNq32Pseudo };
5128 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
5129 return;
5130 }
5131
5132 case Intrinsic::arm_neon_vst4lane: {
5133 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
5134 ARM::VST4LNd16Pseudo,
5135 ARM::VST4LNd32Pseudo };
5136 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
5137 ARM::VST4LNq32Pseudo };
5138 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
5139 return;
5140 }
5141
5142 case Intrinsic::arm_mve_vldr_gather_base_wb:
5143 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
5144 static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre,
5145 ARM::MVE_VLDRDU64_qi_pre};
5146 SelectMVE_WB(N, Opcodes,
5147 IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated);
5148 return;
5149 }
5150
5151 case Intrinsic::arm_mve_vld2q: {
5152 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8};
5153 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
5154 ARM::MVE_VLD21_16};
5155 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
5156 ARM::MVE_VLD21_32};
5157 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
5158 SelectMVE_VLD(N, 2, Opcodes, false);
5159 return;
5160 }
5161
5162 case Intrinsic::arm_mve_vld4q: {
5163 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
5164 ARM::MVE_VLD42_8, ARM::MVE_VLD43_8};
5165 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
5166 ARM::MVE_VLD42_16,
5167 ARM::MVE_VLD43_16};
5168 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
5169 ARM::MVE_VLD42_32,
5170 ARM::MVE_VLD43_32};
5171 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
5172 SelectMVE_VLD(N, 4, Opcodes, false);
5173 return;
5174 }
5175 }
5176 break;
5177 }
5178
5179 case ISD::INTRINSIC_WO_CHAIN: {
5180 unsigned IntNo = N->getConstantOperandVal(0);
5181 switch (IntNo) {
5182 default:
5183 break;
5184
5185 // Scalar f32 -> bf16
5186 case Intrinsic::arm_neon_vcvtbfp2bf: {
5187 SDLoc dl(N);
5188 const SDValue &Src = N->getOperand(1);
5189 llvm::EVT DestTy = N->getValueType(0);
5190 SDValue Pred = getAL(CurDAG, dl);
5191 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
5192 SDValue Ops[] = { Src, Src, Pred, Reg0 };
5193 CurDAG->SelectNodeTo(N, ARM::BF16_VCVTB, DestTy, Ops);
5194 return;
5195 }
5196
5197 // Vector v4f32 -> v4bf16
5198 case Intrinsic::arm_neon_vcvtfp2bf: {
5199 SDLoc dl(N);
5200 const SDValue &Src = N->getOperand(1);
5201 SDValue Pred = getAL(CurDAG, dl);
5202 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
5203 SDValue Ops[] = { Src, Pred, Reg0 };
5204 CurDAG->SelectNodeTo(N, ARM::BF16_VCVT, MVT::v4bf16, Ops);
5205 return;
5206 }
5207
5208 case Intrinsic::arm_mve_urshrl:
5209 SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false);
5210 return;
5211 case Intrinsic::arm_mve_uqshll:
5212 SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false);
5213 return;
5214 case Intrinsic::arm_mve_srshrl:
5215 SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false);
5216 return;
5217 case Intrinsic::arm_mve_sqshll:
5218 SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false);
5219 return;
5220 case Intrinsic::arm_mve_uqrshll:
5221 SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true);
5222 return;
5223 case Intrinsic::arm_mve_sqrshrl:
5224 SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true);
5225 return;
5226
5227 case Intrinsic::arm_mve_vadc:
5228 case Intrinsic::arm_mve_vadc_predicated:
5229 SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true,
5230 IntNo == Intrinsic::arm_mve_vadc_predicated);
5231 return;
5232 case Intrinsic::arm_mve_vsbc:
5233 case Intrinsic::arm_mve_vsbc_predicated:
5234 SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, true,
5235 IntNo == Intrinsic::arm_mve_vsbc_predicated);
5236 return;
5237 case Intrinsic::arm_mve_vshlc:
5238 case Intrinsic::arm_mve_vshlc_predicated:
5239 SelectMVE_VSHLC(N, IntNo == Intrinsic::arm_mve_vshlc_predicated);
5240 return;
5241
5242 case Intrinsic::arm_mve_vmlldava:
5243 case Intrinsic::arm_mve_vmlldava_predicated: {
5244 static const uint16_t OpcodesU[] = {
5245 ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32,
5246 ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32,
5247 };
5248 static const uint16_t OpcodesS[] = {
5249 ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32,
5250 ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32,
5251 ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32,
5252 ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32,
5253 ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32,
5254 ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32,
5255 ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32,
5256 ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32,
5257 };
5258 SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated,
5259 OpcodesS, OpcodesU);
5260 return;
5261 }
5262
5263 case Intrinsic::arm_mve_vrmlldavha:
5264 case Intrinsic::arm_mve_vrmlldavha_predicated: {
5265 static const uint16_t OpcodesU[] = {
5266 ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32,
5267 };
5268 static const uint16_t OpcodesS[] = {
5269 ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32,
5270 ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32,
5271 ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32,
5272 ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32,
5273 };
5274 SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated,
5275 OpcodesS, OpcodesU);
5276 return;
5277 }
5278
5279 case Intrinsic::arm_mve_vidup:
5280 case Intrinsic::arm_mve_vidup_predicated: {
5281 static const uint16_t Opcodes[] = {
5282 ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32,
5283 };
5284 SelectMVE_VxDUP(N, Opcodes, false,
5285 IntNo == Intrinsic::arm_mve_vidup_predicated);
5286 return;
5287 }
5288
5289 case Intrinsic::arm_mve_vddup:
5290 case Intrinsic::arm_mve_vddup_predicated: {
5291 static const uint16_t Opcodes[] = {
5292 ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32,
5293 };
5294 SelectMVE_VxDUP(N, Opcodes, false,
5295 IntNo == Intrinsic::arm_mve_vddup_predicated);
5296 return;
5297 }
5298
5299 case Intrinsic::arm_mve_viwdup:
5300 case Intrinsic::arm_mve_viwdup_predicated: {
5301 static const uint16_t Opcodes[] = {
5302 ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32,
5303 };
5304 SelectMVE_VxDUP(N, Opcodes, true,
5305 IntNo == Intrinsic::arm_mve_viwdup_predicated);
5306 return;
5307 }
5308
5309 case Intrinsic::arm_mve_vdwdup:
5310 case Intrinsic::arm_mve_vdwdup_predicated: {
5311 static const uint16_t Opcodes[] = {
5312 ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32,
5313 };
5314 SelectMVE_VxDUP(N, Opcodes, true,
5315 IntNo == Intrinsic::arm_mve_vdwdup_predicated);
5316 return;
5317 }
5318
5319 case Intrinsic::arm_cde_cx1d:
5320 case Intrinsic::arm_cde_cx1da:
5321 case Intrinsic::arm_cde_cx2d:
5322 case Intrinsic::arm_cde_cx2da:
5323 case Intrinsic::arm_cde_cx3d:
5324 case Intrinsic::arm_cde_cx3da: {
5325 bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da ||
5326 IntNo == Intrinsic::arm_cde_cx2da ||
5327 IntNo == Intrinsic::arm_cde_cx3da;
5328 size_t NumExtraOps;
5329 uint16_t Opcode;
5330 switch (IntNo) {
5331 case Intrinsic::arm_cde_cx1d:
5332 case Intrinsic::arm_cde_cx1da:
5333 NumExtraOps = 0;
5334 Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D;
5335 break;
5336 case Intrinsic::arm_cde_cx2d:
5337 case Intrinsic::arm_cde_cx2da:
5338 NumExtraOps = 1;
5339 Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D;
5340 break;
5341 case Intrinsic::arm_cde_cx3d:
5342 case Intrinsic::arm_cde_cx3da:
5343 NumExtraOps = 2;
5344 Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D;
5345 break;
5346 default:
5347 llvm_unreachable("Unexpected opcode");
5348 }
5349 SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum);
5350 return;
5351 }
5352 }
5353 break;
5354 }
5355
5356 case ISD::ATOMIC_CMP_SWAP:
5357 SelectCMP_SWAP(N);
5358 return;
5359 }
5360
5361 SelectCode(N);
5362 }
5363
5364 // Inspect a register string of the form
5365 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
5366 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
5367 // and obtain the integer operands from them, adding these operands to the
5368 // provided vector.
getIntOperandsFromRegisterString(StringRef RegString,SelectionDAG * CurDAG,const SDLoc & DL,std::vector<SDValue> & Ops)5369 static void getIntOperandsFromRegisterString(StringRef RegString,
5370 SelectionDAG *CurDAG,
5371 const SDLoc &DL,
5372 std::vector<SDValue> &Ops) {
5373 SmallVector<StringRef, 5> Fields;
5374 RegString.split(Fields, ':');
5375
5376 if (Fields.size() > 1) {
5377 bool AllIntFields = true;
5378
5379 for (StringRef Field : Fields) {
5380 // Need to trim out leading 'cp' characters and get the integer field.
5381 unsigned IntField;
5382 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
5383 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
5384 }
5385
5386 assert(AllIntFields &&
5387 "Unexpected non-integer value in special register string.");
5388 (void)AllIntFields;
5389 }
5390 }
5391
5392 // Maps a Banked Register string to its mask value. The mask value returned is
5393 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
5394 // mask operand, which expresses which register is to be used, e.g. r8, and in
5395 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
5396 // was invalid.
getBankedRegisterMask(StringRef RegString)5397 static inline int getBankedRegisterMask(StringRef RegString) {
5398 auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());
5399 if (!TheReg)
5400 return -1;
5401 return TheReg->Encoding;
5402 }
5403
5404 // The flags here are common to those allowed for apsr in the A class cores and
5405 // those allowed for the special registers in the M class cores. Returns a
5406 // value representing which flags were present, -1 if invalid.
getMClassFlagsMask(StringRef Flags)5407 static inline int getMClassFlagsMask(StringRef Flags) {
5408 return StringSwitch<int>(Flags)
5409 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
5410 // correct when flags are not permitted
5411 .Case("g", 0x1)
5412 .Case("nzcvq", 0x2)
5413 .Case("nzcvqg", 0x3)
5414 .Default(-1);
5415 }
5416
5417 // Maps MClass special registers string to its value for use in the
5418 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
5419 // Returns -1 to signify that the string was invalid.
getMClassRegisterMask(StringRef Reg,const ARMSubtarget * Subtarget)5420 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
5421 auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg);
5422 const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
5423 if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits))
5424 return -1;
5425 return (int)(TheReg->Encoding & 0xFFF); // SYSm value
5426 }
5427
getARClassRegisterMask(StringRef Reg,StringRef Flags)5428 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
5429 // The mask operand contains the special register (R Bit) in bit 4, whether
5430 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
5431 // bits 3-0 contains the fields to be accessed in the special register, set by
5432 // the flags provided with the register.
5433 int Mask = 0;
5434 if (Reg == "apsr") {
5435 // The flags permitted for apsr are the same flags that are allowed in
5436 // M class registers. We get the flag value and then shift the flags into
5437 // the correct place to combine with the mask.
5438 Mask = getMClassFlagsMask(Flags);
5439 if (Mask == -1)
5440 return -1;
5441 return Mask << 2;
5442 }
5443
5444 if (Reg != "cpsr" && Reg != "spsr") {
5445 return -1;
5446 }
5447
5448 // This is the same as if the flags were "fc"
5449 if (Flags.empty() || Flags == "all")
5450 return Mask | 0x9;
5451
5452 // Inspect the supplied flags string and set the bits in the mask for
5453 // the relevant and valid flags allowed for cpsr and spsr.
5454 for (char Flag : Flags) {
5455 int FlagVal;
5456 switch (Flag) {
5457 case 'c':
5458 FlagVal = 0x1;
5459 break;
5460 case 'x':
5461 FlagVal = 0x2;
5462 break;
5463 case 's':
5464 FlagVal = 0x4;
5465 break;
5466 case 'f':
5467 FlagVal = 0x8;
5468 break;
5469 default:
5470 FlagVal = 0;
5471 }
5472
5473 // This avoids allowing strings where the same flag bit appears twice.
5474 if (!FlagVal || (Mask & FlagVal))
5475 return -1;
5476 Mask |= FlagVal;
5477 }
5478
5479 // If the register is spsr then we need to set the R bit.
5480 if (Reg == "spsr")
5481 Mask |= 0x10;
5482
5483 return Mask;
5484 }
5485
5486 // Lower the read_register intrinsic to ARM specific DAG nodes
5487 // using the supplied metadata string to select the instruction node to use
5488 // and the registers/masks to construct as operands for the node.
tryReadRegister(SDNode * N)5489 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
5490 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
5491 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
5492 bool IsThumb2 = Subtarget->isThumb2();
5493 SDLoc DL(N);
5494
5495 std::vector<SDValue> Ops;
5496 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
5497
5498 if (!Ops.empty()) {
5499 // If the special register string was constructed of fields (as defined
5500 // in the ACLE) then need to lower to MRC node (32 bit) or
5501 // MRRC node(64 bit), we can make the distinction based on the number of
5502 // operands we have.
5503 unsigned Opcode;
5504 SmallVector<EVT, 3> ResTypes;
5505 if (Ops.size() == 5){
5506 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
5507 ResTypes.append({ MVT::i32, MVT::Other });
5508 } else {
5509 assert(Ops.size() == 3 &&
5510 "Invalid number of fields in special register string.");
5511 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
5512 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
5513 }
5514
5515 Ops.push_back(getAL(CurDAG, DL));
5516 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
5517 Ops.push_back(N->getOperand(0));
5518 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
5519 return true;
5520 }
5521
5522 std::string SpecialReg = RegString->getString().lower();
5523
5524 int BankedReg = getBankedRegisterMask(SpecialReg);
5525 if (BankedReg != -1) {
5526 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
5527 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5528 N->getOperand(0) };
5529 ReplaceNode(
5530 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
5531 DL, MVT::i32, MVT::Other, Ops));
5532 return true;
5533 }
5534
5535 // The VFP registers are read by creating SelectionDAG nodes with opcodes
5536 // corresponding to the register that is being read from. So we switch on the
5537 // string to find which opcode we need to use.
5538 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5539 .Case("fpscr", ARM::VMRS)
5540 .Case("fpexc", ARM::VMRS_FPEXC)
5541 .Case("fpsid", ARM::VMRS_FPSID)
5542 .Case("mvfr0", ARM::VMRS_MVFR0)
5543 .Case("mvfr1", ARM::VMRS_MVFR1)
5544 .Case("mvfr2", ARM::VMRS_MVFR2)
5545 .Case("fpinst", ARM::VMRS_FPINST)
5546 .Case("fpinst2", ARM::VMRS_FPINST2)
5547 .Default(0);
5548
5549 // If an opcode was found then we can lower the read to a VFP instruction.
5550 if (Opcode) {
5551 if (!Subtarget->hasVFP2Base())
5552 return false;
5553 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
5554 return false;
5555
5556 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5557 N->getOperand(0) };
5558 ReplaceNode(N,
5559 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
5560 return true;
5561 }
5562
5563 // If the target is M Class then need to validate that the register string
5564 // is an acceptable value, so check that a mask can be constructed from the
5565 // string.
5566 if (Subtarget->isMClass()) {
5567 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
5568 if (SYSmValue == -1)
5569 return false;
5570
5571 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
5572 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5573 N->getOperand(0) };
5574 ReplaceNode(
5575 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
5576 return true;
5577 }
5578
5579 // Here we know the target is not M Class so we need to check if it is one
5580 // of the remaining possible values which are apsr, cpsr or spsr.
5581 if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
5582 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5583 N->getOperand(0) };
5584 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
5585 DL, MVT::i32, MVT::Other, Ops));
5586 return true;
5587 }
5588
5589 if (SpecialReg == "spsr") {
5590 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5591 N->getOperand(0) };
5592 ReplaceNode(
5593 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
5594 MVT::i32, MVT::Other, Ops));
5595 return true;
5596 }
5597
5598 return false;
5599 }
5600
5601 // Lower the write_register intrinsic to ARM specific DAG nodes
5602 // using the supplied metadata string to select the instruction node to use
5603 // and the registers/masks to use in the nodes
tryWriteRegister(SDNode * N)5604 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
5605 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
5606 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
5607 bool IsThumb2 = Subtarget->isThumb2();
5608 SDLoc DL(N);
5609
5610 std::vector<SDValue> Ops;
5611 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
5612
5613 if (!Ops.empty()) {
5614 // If the special register string was constructed of fields (as defined
5615 // in the ACLE) then need to lower to MCR node (32 bit) or
5616 // MCRR node(64 bit), we can make the distinction based on the number of
5617 // operands we have.
5618 unsigned Opcode;
5619 if (Ops.size() == 5) {
5620 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
5621 Ops.insert(Ops.begin()+2, N->getOperand(2));
5622 } else {
5623 assert(Ops.size() == 3 &&
5624 "Invalid number of fields in special register string.");
5625 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
5626 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
5627 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
5628 }
5629
5630 Ops.push_back(getAL(CurDAG, DL));
5631 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
5632 Ops.push_back(N->getOperand(0));
5633
5634 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
5635 return true;
5636 }
5637
5638 std::string SpecialReg = RegString->getString().lower();
5639 int BankedReg = getBankedRegisterMask(SpecialReg);
5640 if (BankedReg != -1) {
5641 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
5642 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5643 N->getOperand(0) };
5644 ReplaceNode(
5645 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
5646 DL, MVT::Other, Ops));
5647 return true;
5648 }
5649
5650 // The VFP registers are written to by creating SelectionDAG nodes with
5651 // opcodes corresponding to the register that is being written. So we switch
5652 // on the string to find which opcode we need to use.
5653 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5654 .Case("fpscr", ARM::VMSR)
5655 .Case("fpexc", ARM::VMSR_FPEXC)
5656 .Case("fpsid", ARM::VMSR_FPSID)
5657 .Case("fpinst", ARM::VMSR_FPINST)
5658 .Case("fpinst2", ARM::VMSR_FPINST2)
5659 .Default(0);
5660
5661 if (Opcode) {
5662 if (!Subtarget->hasVFP2Base())
5663 return false;
5664 Ops = { N->getOperand(2), getAL(CurDAG, DL),
5665 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
5666 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
5667 return true;
5668 }
5669
5670 std::pair<StringRef, StringRef> Fields;
5671 Fields = StringRef(SpecialReg).rsplit('_');
5672 std::string Reg = Fields.first.str();
5673 StringRef Flags = Fields.second;
5674
5675 // If the target was M Class then need to validate the special register value
5676 // and retrieve the mask for use in the instruction node.
5677 if (Subtarget->isMClass()) {
5678 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
5679 if (SYSmValue == -1)
5680 return false;
5681
5682 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
5683 N->getOperand(2), getAL(CurDAG, DL),
5684 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
5685 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
5686 return true;
5687 }
5688
5689 // We then check to see if a valid mask can be constructed for one of the
5690 // register string values permitted for the A and R class cores. These values
5691 // are apsr, spsr and cpsr; these are also valid on older cores.
5692 int Mask = getARClassRegisterMask(Reg, Flags);
5693 if (Mask != -1) {
5694 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
5695 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5696 N->getOperand(0) };
5697 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
5698 DL, MVT::Other, Ops));
5699 return true;
5700 }
5701
5702 return false;
5703 }
5704
tryInlineAsm(SDNode * N)5705 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
5706 std::vector<SDValue> AsmNodeOperands;
5707 InlineAsm::Flag Flag;
5708 bool Changed = false;
5709 unsigned NumOps = N->getNumOperands();
5710
5711 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
5712 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
5713 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
5714 // respectively. Since there is no constraint to explicitly specify a
5715 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
5716 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
5717 // them into a GPRPair.
5718
5719 SDLoc dl(N);
5720 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps - 1) : SDValue();
5721
5722 SmallVector<bool, 8> OpChanged;
5723 // Glue node will be appended late.
5724 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
5725 SDValue op = N->getOperand(i);
5726 AsmNodeOperands.push_back(op);
5727
5728 if (i < InlineAsm::Op_FirstOperand)
5729 continue;
5730
5731 if (const auto *C = dyn_cast<ConstantSDNode>(N->getOperand(i)))
5732 Flag = InlineAsm::Flag(C->getZExtValue());
5733 else
5734 continue;
5735
5736 // Immediate operands to inline asm in the SelectionDAG are modeled with
5737 // two operands. The first is a constant of value InlineAsm::Kind::Imm, and
5738 // the second is a constant with the value of the immediate. If we get here
5739 // and we have a Kind::Imm, skip the next operand, and continue.
5740 if (Flag.isImmKind()) {
5741 SDValue op = N->getOperand(++i);
5742 AsmNodeOperands.push_back(op);
5743 continue;
5744 }
5745
5746 const unsigned NumRegs = Flag.getNumOperandRegisters();
5747 if (NumRegs)
5748 OpChanged.push_back(false);
5749
5750 unsigned DefIdx = 0;
5751 bool IsTiedToChangedOp = false;
5752 // If it's a use that is tied with a previous def, it has no
5753 // reg class constraint.
5754 if (Changed && Flag.isUseOperandTiedToDef(DefIdx))
5755 IsTiedToChangedOp = OpChanged[DefIdx];
5756
5757 // Memory operands to inline asm in the SelectionDAG are modeled with two
5758 // operands: a constant of value InlineAsm::Kind::Mem followed by the input
5759 // operand. If we get here and we have a Kind::Mem, skip the next operand
5760 // (so it doesn't get misinterpreted), and continue. We do this here because
5761 // it's important to update the OpChanged array correctly before moving on.
5762 if (Flag.isMemKind()) {
5763 SDValue op = N->getOperand(++i);
5764 AsmNodeOperands.push_back(op);
5765 continue;
5766 }
5767
5768 if (!Flag.isRegUseKind() && !Flag.isRegDefKind() &&
5769 !Flag.isRegDefEarlyClobberKind())
5770 continue;
5771
5772 unsigned RC;
5773 const bool HasRC = Flag.hasRegClassConstraint(RC);
5774 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
5775 || NumRegs != 2)
5776 continue;
5777
5778 assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
5779 SDValue V0 = N->getOperand(i+1);
5780 SDValue V1 = N->getOperand(i+2);
5781 Register Reg0 = cast<RegisterSDNode>(V0)->getReg();
5782 Register Reg1 = cast<RegisterSDNode>(V1)->getReg();
5783 SDValue PairedReg;
5784 MachineRegisterInfo &MRI = MF->getRegInfo();
5785
5786 if (Flag.isRegDefKind() || Flag.isRegDefEarlyClobberKind()) {
5787 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
5788 // the original GPRs.
5789
5790 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
5791 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
5792 SDValue Chain = SDValue(N,0);
5793
5794 SDNode *GU = N->getGluedUser();
5795 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
5796 Chain.getValue(1));
5797
5798 // Extract values from a GPRPair reg and copy to the original GPR reg.
5799 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
5800 RegCopy);
5801 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
5802 RegCopy);
5803 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
5804 RegCopy.getValue(1));
5805 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
5806
5807 // Update the original glue user.
5808 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
5809 Ops.push_back(T1.getValue(1));
5810 CurDAG->UpdateNodeOperands(GU, Ops);
5811 } else {
5812 // For Kind == InlineAsm::Kind::RegUse, we first copy two GPRs into a
5813 // GPRPair and then pass the GPRPair to the inline asm.
5814 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
5815
5816 // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
5817 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
5818 Chain.getValue(1));
5819 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
5820 T0.getValue(1));
5821 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
5822
5823 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
5824 // i32 VRs of inline asm with it.
5825 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
5826 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
5827 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
5828
5829 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
5830 Glue = Chain.getValue(1);
5831 }
5832
5833 Changed = true;
5834
5835 if(PairedReg.getNode()) {
5836 OpChanged[OpChanged.size() -1 ] = true;
5837 Flag = InlineAsm::Flag(Flag.getKind(), 1 /* RegNum*/);
5838 if (IsTiedToChangedOp)
5839 Flag.setMatchingOp(DefIdx);
5840 else
5841 Flag.setRegClass(ARM::GPRPairRegClassID);
5842 // Replace the current flag.
5843 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
5844 Flag, dl, MVT::i32);
5845 // Add the new register node and skip the original two GPRs.
5846 AsmNodeOperands.push_back(PairedReg);
5847 // Skip the next two GPRs.
5848 i += 2;
5849 }
5850 }
5851
5852 if (Glue.getNode())
5853 AsmNodeOperands.push_back(Glue);
5854 if (!Changed)
5855 return false;
5856
5857 SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
5858 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
5859 New->setNodeId(-1);
5860 ReplaceNode(N, New.getNode());
5861 return true;
5862 }
5863
SelectInlineAsmMemoryOperand(const SDValue & Op,InlineAsm::ConstraintCode ConstraintID,std::vector<SDValue> & OutOps)5864 bool ARMDAGToDAGISel::SelectInlineAsmMemoryOperand(
5865 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
5866 std::vector<SDValue> &OutOps) {
5867 switch(ConstraintID) {
5868 default:
5869 llvm_unreachable("Unexpected asm memory constraint");
5870 case InlineAsm::ConstraintCode::m:
5871 case InlineAsm::ConstraintCode::o:
5872 case InlineAsm::ConstraintCode::Q:
5873 case InlineAsm::ConstraintCode::Um:
5874 case InlineAsm::ConstraintCode::Un:
5875 case InlineAsm::ConstraintCode::Uq:
5876 case InlineAsm::ConstraintCode::Us:
5877 case InlineAsm::ConstraintCode::Ut:
5878 case InlineAsm::ConstraintCode::Uv:
5879 case InlineAsm::ConstraintCode::Uy:
5880 // Require the address to be in a register. That is safe for all ARM
5881 // variants and it is hard to do anything much smarter without knowing
5882 // how the operand is used.
5883 OutOps.push_back(Op);
5884 return false;
5885 }
5886 return true;
5887 }
5888
5889 /// createARMISelDag - This pass converts a legalized DAG into a
5890 /// ARM-specific DAG, ready for instruction scheduling.
5891 ///
createARMISelDag(ARMBaseTargetMachine & TM,CodeGenOptLevel OptLevel)5892 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
5893 CodeGenOptLevel OptLevel) {
5894 return new ARMDAGToDAGISel(TM, OptLevel);
5895 }
5896