1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the ARM target.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "ARM.h"
14 #include "ARMBaseInstrInfo.h"
15 #include "ARMTargetMachine.h"
16 #include "MCTargetDesc/ARMAddressingModes.h"
17 #include "Utils/ARMBaseInfo.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/SelectionDAG.h"
24 #include "llvm/CodeGen/SelectionDAGISel.h"
25 #include "llvm/CodeGen/TargetLowering.h"
26 #include "llvm/IR/CallingConv.h"
27 #include "llvm/IR/Constants.h"
28 #include "llvm/IR/DerivedTypes.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/IntrinsicsARM.h"
32 #include "llvm/IR/LLVMContext.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Target/TargetOptions.h"
37
38 using namespace llvm;
39
40 #define DEBUG_TYPE "arm-isel"
41
42 static cl::opt<bool>
43 DisableShifterOp("disable-shifter-op", cl::Hidden,
44 cl::desc("Disable isel of shifter-op"),
45 cl::init(false));
46
47 //===--------------------------------------------------------------------===//
48 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
49 /// instructions for SelectionDAG operations.
50 ///
51 namespace {
52
53 class ARMDAGToDAGISel : public SelectionDAGISel {
54 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
55 /// make the right decision when generating code for different targets.
56 const ARMSubtarget *Subtarget;
57
58 public:
ARMDAGToDAGISel(ARMBaseTargetMachine & tm,CodeGenOpt::Level OptLevel)59 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
60 : SelectionDAGISel(tm, OptLevel) {}
61
runOnMachineFunction(MachineFunction & MF)62 bool runOnMachineFunction(MachineFunction &MF) override {
63 // Reset the subtarget each time through.
64 Subtarget = &MF.getSubtarget<ARMSubtarget>();
65 SelectionDAGISel::runOnMachineFunction(MF);
66 return true;
67 }
68
getPassName() const69 StringRef getPassName() const override { return "ARM Instruction Selection"; }
70
71 void PreprocessISelDAG() override;
72
73 /// getI32Imm - Return a target constant of type i32 with the specified
74 /// value.
getI32Imm(unsigned Imm,const SDLoc & dl)75 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
76 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
77 }
78
79 void Select(SDNode *N) override;
80
81 bool hasNoVMLxHazardUse(SDNode *N) const;
82 bool isShifterOpProfitable(const SDValue &Shift,
83 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
84 bool SelectRegShifterOperand(SDValue N, SDValue &A,
85 SDValue &B, SDValue &C,
86 bool CheckProfitability = true);
87 bool SelectImmShifterOperand(SDValue N, SDValue &A,
88 SDValue &B, bool CheckProfitability = true);
SelectShiftRegShifterOperand(SDValue N,SDValue & A,SDValue & B,SDValue & C)89 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
90 SDValue &B, SDValue &C) {
91 // Don't apply the profitability check
92 return SelectRegShifterOperand(N, A, B, C, false);
93 }
SelectShiftImmShifterOperand(SDValue N,SDValue & A,SDValue & B)94 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
95 SDValue &B) {
96 // Don't apply the profitability check
97 return SelectImmShifterOperand(N, A, B, false);
98 }
99
100 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
101
102 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
103 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
104
SelectCMOVPred(SDValue N,SDValue & Pred,SDValue & Reg)105 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
106 const ConstantSDNode *CN = cast<ConstantSDNode>(N);
107 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
108 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
109 return true;
110 }
111
112 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
113 SDValue &Offset, SDValue &Opc);
114 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
115 SDValue &Offset, SDValue &Opc);
116 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
117 SDValue &Offset, SDValue &Opc);
118 bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
119 bool SelectAddrMode3(SDValue N, SDValue &Base,
120 SDValue &Offset, SDValue &Opc);
121 bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
122 SDValue &Offset, SDValue &Opc);
123 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
124 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
125 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
126 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
127 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
128
129 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
130
131 // Thumb Addressing Modes:
132 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
133 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
134 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
135 SDValue &OffImm);
136 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
137 SDValue &OffImm);
138 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
139 SDValue &OffImm);
140 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
141 SDValue &OffImm);
142 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
143 template <unsigned Shift>
144 bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
145
146 // Thumb 2 Addressing Modes:
147 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
148 template <unsigned Shift>
149 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm);
150 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
151 SDValue &OffImm);
152 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
153 SDValue &OffImm);
154 template <unsigned Shift>
155 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);
156 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,
157 unsigned Shift);
158 template <unsigned Shift>
159 bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
160 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
161 SDValue &OffReg, SDValue &ShImm);
162 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
163
164 template<int Min, int Max>
165 bool SelectImmediateInRange(SDValue N, SDValue &OffImm);
166
is_so_imm(unsigned Imm) const167 inline bool is_so_imm(unsigned Imm) const {
168 return ARM_AM::getSOImmVal(Imm) != -1;
169 }
170
is_so_imm_not(unsigned Imm) const171 inline bool is_so_imm_not(unsigned Imm) const {
172 return ARM_AM::getSOImmVal(~Imm) != -1;
173 }
174
is_t2_so_imm(unsigned Imm) const175 inline bool is_t2_so_imm(unsigned Imm) const {
176 return ARM_AM::getT2SOImmVal(Imm) != -1;
177 }
178
is_t2_so_imm_not(unsigned Imm) const179 inline bool is_t2_so_imm_not(unsigned Imm) const {
180 return ARM_AM::getT2SOImmVal(~Imm) != -1;
181 }
182
183 // Include the pieces autogenerated from the target description.
184 #include "ARMGenDAGISel.inc"
185
186 private:
187 void transferMemOperands(SDNode *Src, SDNode *Dst);
188
189 /// Indexed (pre/post inc/dec) load matching code for ARM.
190 bool tryARMIndexedLoad(SDNode *N);
191 bool tryT1IndexedLoad(SDNode *N);
192 bool tryT2IndexedLoad(SDNode *N);
193 bool tryMVEIndexedLoad(SDNode *N);
194
195 /// SelectVLD - Select NEON load intrinsics. NumVecs should be
196 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
197 /// loads of D registers and even subregs and odd subregs of Q registers.
198 /// For NumVecs <= 2, QOpcodes1 is not used.
199 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
200 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
201 const uint16_t *QOpcodes1);
202
203 /// SelectVST - Select NEON store intrinsics. NumVecs should
204 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
205 /// stores of D registers and even subregs and odd subregs of Q registers.
206 /// For NumVecs <= 2, QOpcodes1 is not used.
207 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
208 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
209 const uint16_t *QOpcodes1);
210
211 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
212 /// be 2, 3 or 4. The opcode arrays specify the instructions used for
213 /// load/store of D registers and Q registers.
214 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
215 unsigned NumVecs, const uint16_t *DOpcodes,
216 const uint16_t *QOpcodes);
217
218 /// Helper functions for setting up clusters of MVE predication operands.
219 template <typename SDValueVector>
220 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
221 SDValue PredicateMask);
222 template <typename SDValueVector>
223 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
224 SDValue PredicateMask, SDValue Inactive);
225
226 template <typename SDValueVector>
227 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc);
228 template <typename SDValueVector>
229 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy);
230
231 /// SelectMVE_WB - Select MVE writeback load/store intrinsics.
232 void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated);
233
234 /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics.
235 void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate,
236 bool HasSaturationOperand);
237
238 /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics.
239 void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
240 uint16_t OpcodeWithNoCarry, bool Add, bool Predicated);
241
242 /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between
243 /// vector lanes.
244 void SelectMVE_VSHLC(SDNode *N, bool Predicated);
245
246 /// Select long MVE vector reductions with two vector operands
247 /// Stride is the number of vector element widths the instruction can operate
248 /// on:
249 /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32]
250 /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32]
251 /// Stride is used when addressing the OpcodesS array which contains multiple
252 /// opcodes for each element width.
253 /// TySize is the index into the list of element types listed above
254 void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
255 const uint16_t *OpcodesS, const uint16_t *OpcodesU,
256 size_t Stride, size_t TySize);
257
258 /// Select a 64-bit MVE vector reduction with two vector operands
259 /// arm_mve_vmlldava_[predicated]
260 void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
261 const uint16_t *OpcodesU);
262 /// Select a 72-bit MVE vector rounding reduction with two vector operands
263 /// int_arm_mve_vrmlldavha[_predicated]
264 void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
265 const uint16_t *OpcodesU);
266
267 /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs
268 /// should be 2 or 4. The opcode array specifies the instructions
269 /// used for 8, 16 and 32-bit lane sizes respectively, and each
270 /// pointer points to a set of NumVecs sub-opcodes used for the
271 /// different stages (e.g. VLD20 versus VLD21) of each load family.
272 void SelectMVE_VLD(SDNode *N, unsigned NumVecs,
273 const uint16_t *const *Opcodes, bool HasWriteback);
274
275 /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an
276 /// array of 3 elements for the 8, 16 and 32-bit lane sizes.
277 void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
278 bool Wrapping, bool Predicated);
279
280 /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D,
281 /// CX1DA, CX2D, CX2DA, CX3, CX3DA).
282 /// \arg \c NumExtraOps number of extra operands besides the coprocossor,
283 /// the accumulator and the immediate operand, i.e. 0
284 /// for CX1*, 1 for CX2*, 2 for CX3*
285 /// \arg \c HasAccum whether the instruction has an accumulator operand
286 void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps,
287 bool HasAccum);
288
289 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
290 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
291 /// for loading D registers.
292 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
293 unsigned NumVecs, const uint16_t *DOpcodes,
294 const uint16_t *QOpcodes0 = nullptr,
295 const uint16_t *QOpcodes1 = nullptr);
296
297 /// Try to select SBFX/UBFX instructions for ARM.
298 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
299
300 // Select special operations if node forms integer ABS pattern
301 bool tryABSOp(SDNode *N);
302
303 bool tryReadRegister(SDNode *N);
304 bool tryWriteRegister(SDNode *N);
305
306 bool tryInlineAsm(SDNode *N);
307
308 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
309
310 void SelectCMP_SWAP(SDNode *N);
311
312 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
313 /// inline asm expressions.
314 bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
315 std::vector<SDValue> &OutOps) override;
316
317 // Form pairs of consecutive R, S, D, or Q registers.
318 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
319 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
320 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
321 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
322
323 // Form sequences of 4 consecutive S, D, or Q registers.
324 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
325 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
326 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
327
328 // Get the alignment operand for a NEON VLD or VST instruction.
329 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
330 bool is64BitVector);
331
332 /// Checks if N is a multiplication by a constant where we can extract out a
333 /// power of two from the constant so that it can be used in a shift, but only
334 /// if it simplifies the materialization of the constant. Returns true if it
335 /// is, and assigns to PowerOfTwo the power of two that should be extracted
336 /// out and to NewMulConst the new constant to be multiplied by.
337 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
338 unsigned &PowerOfTwo, SDValue &NewMulConst) const;
339
340 /// Replace N with M in CurDAG, in a way that also ensures that M gets
341 /// selected when N would have been selected.
342 void replaceDAGValue(const SDValue &N, SDValue M);
343 };
344 }
345
346 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
347 /// operand. If so Imm will receive the 32-bit value.
isInt32Immediate(SDNode * N,unsigned & Imm)348 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
349 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
350 Imm = cast<ConstantSDNode>(N)->getZExtValue();
351 return true;
352 }
353 return false;
354 }
355
356 // isInt32Immediate - This method tests to see if a constant operand.
357 // If so Imm will receive the 32 bit value.
isInt32Immediate(SDValue N,unsigned & Imm)358 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
359 return isInt32Immediate(N.getNode(), Imm);
360 }
361
362 // isOpcWithIntImmediate - This method tests to see if the node is a specific
363 // opcode and that it has a immediate integer right operand.
364 // If so Imm will receive the 32 bit value.
isOpcWithIntImmediate(SDNode * N,unsigned Opc,unsigned & Imm)365 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
366 return N->getOpcode() == Opc &&
367 isInt32Immediate(N->getOperand(1).getNode(), Imm);
368 }
369
370 /// Check whether a particular node is a constant value representable as
371 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
372 ///
373 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
isScaledConstantInRange(SDValue Node,int Scale,int RangeMin,int RangeMax,int & ScaledConstant)374 static bool isScaledConstantInRange(SDValue Node, int Scale,
375 int RangeMin, int RangeMax,
376 int &ScaledConstant) {
377 assert(Scale > 0 && "Invalid scale!");
378
379 // Check that this is a constant.
380 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
381 if (!C)
382 return false;
383
384 ScaledConstant = (int) C->getZExtValue();
385 if ((ScaledConstant % Scale) != 0)
386 return false;
387
388 ScaledConstant /= Scale;
389 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
390 }
391
PreprocessISelDAG()392 void ARMDAGToDAGISel::PreprocessISelDAG() {
393 if (!Subtarget->hasV6T2Ops())
394 return;
395
396 bool isThumb2 = Subtarget->isThumb();
397 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
398 E = CurDAG->allnodes_end(); I != E; ) {
399 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
400
401 if (N->getOpcode() != ISD::ADD)
402 continue;
403
404 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
405 // leading zeros, followed by consecutive set bits, followed by 1 or 2
406 // trailing zeros, e.g. 1020.
407 // Transform the expression to
408 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
409 // of trailing zeros of c2. The left shift would be folded as an shifter
410 // operand of 'add' and the 'and' and 'srl' would become a bits extraction
411 // node (UBFX).
412
413 SDValue N0 = N->getOperand(0);
414 SDValue N1 = N->getOperand(1);
415 unsigned And_imm = 0;
416 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
417 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
418 std::swap(N0, N1);
419 }
420 if (!And_imm)
421 continue;
422
423 // Check if the AND mask is an immediate of the form: 000.....1111111100
424 unsigned TZ = countTrailingZeros(And_imm);
425 if (TZ != 1 && TZ != 2)
426 // Be conservative here. Shifter operands aren't always free. e.g. On
427 // Swift, left shifter operand of 1 / 2 for free but others are not.
428 // e.g.
429 // ubfx r3, r1, #16, #8
430 // ldr.w r3, [r0, r3, lsl #2]
431 // vs.
432 // mov.w r9, #1020
433 // and.w r2, r9, r1, lsr #14
434 // ldr r2, [r0, r2]
435 continue;
436 And_imm >>= TZ;
437 if (And_imm & (And_imm + 1))
438 continue;
439
440 // Look for (and (srl X, c1), c2).
441 SDValue Srl = N1.getOperand(0);
442 unsigned Srl_imm = 0;
443 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
444 (Srl_imm <= 2))
445 continue;
446
447 // Make sure first operand is not a shifter operand which would prevent
448 // folding of the left shift.
449 SDValue CPTmp0;
450 SDValue CPTmp1;
451 SDValue CPTmp2;
452 if (isThumb2) {
453 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
454 continue;
455 } else {
456 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
457 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
458 continue;
459 }
460
461 // Now make the transformation.
462 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
463 Srl.getOperand(0),
464 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
465 MVT::i32));
466 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
467 Srl,
468 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
469 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
470 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
471 CurDAG->UpdateNodeOperands(N, N0, N1);
472 }
473 }
474
475 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
476 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
477 /// least on current ARM implementations) which should be avoidded.
hasNoVMLxHazardUse(SDNode * N) const478 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
479 if (OptLevel == CodeGenOpt::None)
480 return true;
481
482 if (!Subtarget->hasVMLxHazards())
483 return true;
484
485 if (!N->hasOneUse())
486 return false;
487
488 SDNode *Use = *N->use_begin();
489 if (Use->getOpcode() == ISD::CopyToReg)
490 return true;
491 if (Use->isMachineOpcode()) {
492 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
493 CurDAG->getSubtarget().getInstrInfo());
494
495 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
496 if (MCID.mayStore())
497 return true;
498 unsigned Opcode = MCID.getOpcode();
499 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
500 return true;
501 // vmlx feeding into another vmlx. We actually want to unfold
502 // the use later in the MLxExpansion pass. e.g.
503 // vmla
504 // vmla (stall 8 cycles)
505 //
506 // vmul (5 cycles)
507 // vadd (5 cycles)
508 // vmla
509 // This adds up to about 18 - 19 cycles.
510 //
511 // vmla
512 // vmul (stall 4 cycles)
513 // vadd adds up to about 14 cycles.
514 return TII->isFpMLxInstruction(Opcode);
515 }
516
517 return false;
518 }
519
isShifterOpProfitable(const SDValue & Shift,ARM_AM::ShiftOpc ShOpcVal,unsigned ShAmt)520 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
521 ARM_AM::ShiftOpc ShOpcVal,
522 unsigned ShAmt) {
523 if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
524 return true;
525 if (Shift.hasOneUse())
526 return true;
527 // R << 2 is free.
528 return ShOpcVal == ARM_AM::lsl &&
529 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
530 }
531
canExtractShiftFromMul(const SDValue & N,unsigned MaxShift,unsigned & PowerOfTwo,SDValue & NewMulConst) const532 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
533 unsigned MaxShift,
534 unsigned &PowerOfTwo,
535 SDValue &NewMulConst) const {
536 assert(N.getOpcode() == ISD::MUL);
537 assert(MaxShift > 0);
538
539 // If the multiply is used in more than one place then changing the constant
540 // will make other uses incorrect, so don't.
541 if (!N.hasOneUse()) return false;
542 // Check if the multiply is by a constant
543 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
544 if (!MulConst) return false;
545 // If the constant is used in more than one place then modifying it will mean
546 // we need to materialize two constants instead of one, which is a bad idea.
547 if (!MulConst->hasOneUse()) return false;
548 unsigned MulConstVal = MulConst->getZExtValue();
549 if (MulConstVal == 0) return false;
550
551 // Find the largest power of 2 that MulConstVal is a multiple of
552 PowerOfTwo = MaxShift;
553 while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
554 --PowerOfTwo;
555 if (PowerOfTwo == 0) return false;
556 }
557
558 // Only optimise if the new cost is better
559 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
560 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
561 unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget);
562 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget);
563 return NewCost < OldCost;
564 }
565
replaceDAGValue(const SDValue & N,SDValue M)566 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
567 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
568 ReplaceUses(N, M);
569 }
570
SelectImmShifterOperand(SDValue N,SDValue & BaseReg,SDValue & Opc,bool CheckProfitability)571 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
572 SDValue &BaseReg,
573 SDValue &Opc,
574 bool CheckProfitability) {
575 if (DisableShifterOp)
576 return false;
577
578 // If N is a multiply-by-constant and it's profitable to extract a shift and
579 // use it in a shifted operand do so.
580 if (N.getOpcode() == ISD::MUL) {
581 unsigned PowerOfTwo = 0;
582 SDValue NewMulConst;
583 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
584 HandleSDNode Handle(N);
585 SDLoc Loc(N);
586 replaceDAGValue(N.getOperand(1), NewMulConst);
587 BaseReg = Handle.getValue();
588 Opc = CurDAG->getTargetConstant(
589 ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
590 return true;
591 }
592 }
593
594 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
595
596 // Don't match base register only case. That is matched to a separate
597 // lower complexity pattern with explicit register operand.
598 if (ShOpcVal == ARM_AM::no_shift) return false;
599
600 BaseReg = N.getOperand(0);
601 unsigned ShImmVal = 0;
602 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
603 if (!RHS) return false;
604 ShImmVal = RHS->getZExtValue() & 31;
605 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
606 SDLoc(N), MVT::i32);
607 return true;
608 }
609
SelectRegShifterOperand(SDValue N,SDValue & BaseReg,SDValue & ShReg,SDValue & Opc,bool CheckProfitability)610 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
611 SDValue &BaseReg,
612 SDValue &ShReg,
613 SDValue &Opc,
614 bool CheckProfitability) {
615 if (DisableShifterOp)
616 return false;
617
618 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
619
620 // Don't match base register only case. That is matched to a separate
621 // lower complexity pattern with explicit register operand.
622 if (ShOpcVal == ARM_AM::no_shift) return false;
623
624 BaseReg = N.getOperand(0);
625 unsigned ShImmVal = 0;
626 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
627 if (RHS) return false;
628
629 ShReg = N.getOperand(1);
630 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
631 return false;
632 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
633 SDLoc(N), MVT::i32);
634 return true;
635 }
636
637 // Determine whether an ISD::OR's operands are suitable to turn the operation
638 // into an addition, which often has more compact encodings.
SelectAddLikeOr(SDNode * Parent,SDValue N,SDValue & Out)639 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
640 assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
641 Out = N;
642 return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
643 }
644
645
SelectAddrModeImm12(SDValue N,SDValue & Base,SDValue & OffImm)646 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
647 SDValue &Base,
648 SDValue &OffImm) {
649 // Match simple R + imm12 operands.
650
651 // Base only.
652 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
653 !CurDAG->isBaseWithConstantOffset(N)) {
654 if (N.getOpcode() == ISD::FrameIndex) {
655 // Match frame index.
656 int FI = cast<FrameIndexSDNode>(N)->getIndex();
657 Base = CurDAG->getTargetFrameIndex(
658 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
659 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
660 return true;
661 }
662
663 if (N.getOpcode() == ARMISD::Wrapper &&
664 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
665 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
666 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
667 Base = N.getOperand(0);
668 } else
669 Base = N;
670 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
671 return true;
672 }
673
674 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
675 int RHSC = (int)RHS->getSExtValue();
676 if (N.getOpcode() == ISD::SUB)
677 RHSC = -RHSC;
678
679 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
680 Base = N.getOperand(0);
681 if (Base.getOpcode() == ISD::FrameIndex) {
682 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
683 Base = CurDAG->getTargetFrameIndex(
684 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
685 }
686 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
687 return true;
688 }
689 }
690
691 // Base only.
692 Base = N;
693 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
694 return true;
695 }
696
697
698
SelectLdStSOReg(SDValue N,SDValue & Base,SDValue & Offset,SDValue & Opc)699 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
700 SDValue &Opc) {
701 if (N.getOpcode() == ISD::MUL &&
702 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
703 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
704 // X * [3,5,9] -> X + X * [2,4,8] etc.
705 int RHSC = (int)RHS->getZExtValue();
706 if (RHSC & 1) {
707 RHSC = RHSC & ~1;
708 ARM_AM::AddrOpc AddSub = ARM_AM::add;
709 if (RHSC < 0) {
710 AddSub = ARM_AM::sub;
711 RHSC = - RHSC;
712 }
713 if (isPowerOf2_32(RHSC)) {
714 unsigned ShAmt = Log2_32(RHSC);
715 Base = Offset = N.getOperand(0);
716 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
717 ARM_AM::lsl),
718 SDLoc(N), MVT::i32);
719 return true;
720 }
721 }
722 }
723 }
724
725 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
726 // ISD::OR that is equivalent to an ISD::ADD.
727 !CurDAG->isBaseWithConstantOffset(N))
728 return false;
729
730 // Leave simple R +/- imm12 operands for LDRi12
731 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
732 int RHSC;
733 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
734 -0x1000+1, 0x1000, RHSC)) // 12 bits.
735 return false;
736 }
737
738 // Otherwise this is R +/- [possibly shifted] R.
739 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
740 ARM_AM::ShiftOpc ShOpcVal =
741 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
742 unsigned ShAmt = 0;
743
744 Base = N.getOperand(0);
745 Offset = N.getOperand(1);
746
747 if (ShOpcVal != ARM_AM::no_shift) {
748 // Check to see if the RHS of the shift is a constant, if not, we can't fold
749 // it.
750 if (ConstantSDNode *Sh =
751 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
752 ShAmt = Sh->getZExtValue();
753 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
754 Offset = N.getOperand(1).getOperand(0);
755 else {
756 ShAmt = 0;
757 ShOpcVal = ARM_AM::no_shift;
758 }
759 } else {
760 ShOpcVal = ARM_AM::no_shift;
761 }
762 }
763
764 // Try matching (R shl C) + (R).
765 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
766 !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
767 N.getOperand(0).hasOneUse())) {
768 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
769 if (ShOpcVal != ARM_AM::no_shift) {
770 // Check to see if the RHS of the shift is a constant, if not, we can't
771 // fold it.
772 if (ConstantSDNode *Sh =
773 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
774 ShAmt = Sh->getZExtValue();
775 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
776 Offset = N.getOperand(0).getOperand(0);
777 Base = N.getOperand(1);
778 } else {
779 ShAmt = 0;
780 ShOpcVal = ARM_AM::no_shift;
781 }
782 } else {
783 ShOpcVal = ARM_AM::no_shift;
784 }
785 }
786 }
787
788 // If Offset is a multiply-by-constant and it's profitable to extract a shift
789 // and use it in a shifted operand do so.
790 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
791 unsigned PowerOfTwo = 0;
792 SDValue NewMulConst;
793 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
794 HandleSDNode Handle(Offset);
795 replaceDAGValue(Offset.getOperand(1), NewMulConst);
796 Offset = Handle.getValue();
797 ShAmt = PowerOfTwo;
798 ShOpcVal = ARM_AM::lsl;
799 }
800 }
801
802 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
803 SDLoc(N), MVT::i32);
804 return true;
805 }
806
SelectAddrMode2OffsetReg(SDNode * Op,SDValue N,SDValue & Offset,SDValue & Opc)807 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
808 SDValue &Offset, SDValue &Opc) {
809 unsigned Opcode = Op->getOpcode();
810 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
811 ? cast<LoadSDNode>(Op)->getAddressingMode()
812 : cast<StoreSDNode>(Op)->getAddressingMode();
813 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
814 ? ARM_AM::add : ARM_AM::sub;
815 int Val;
816 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
817 return false;
818
819 Offset = N;
820 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
821 unsigned ShAmt = 0;
822 if (ShOpcVal != ARM_AM::no_shift) {
823 // Check to see if the RHS of the shift is a constant, if not, we can't fold
824 // it.
825 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
826 ShAmt = Sh->getZExtValue();
827 if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
828 Offset = N.getOperand(0);
829 else {
830 ShAmt = 0;
831 ShOpcVal = ARM_AM::no_shift;
832 }
833 } else {
834 ShOpcVal = ARM_AM::no_shift;
835 }
836 }
837
838 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
839 SDLoc(N), MVT::i32);
840 return true;
841 }
842
SelectAddrMode2OffsetImmPre(SDNode * Op,SDValue N,SDValue & Offset,SDValue & Opc)843 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
844 SDValue &Offset, SDValue &Opc) {
845 unsigned Opcode = Op->getOpcode();
846 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
847 ? cast<LoadSDNode>(Op)->getAddressingMode()
848 : cast<StoreSDNode>(Op)->getAddressingMode();
849 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
850 ? ARM_AM::add : ARM_AM::sub;
851 int Val;
852 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
853 if (AddSub == ARM_AM::sub) Val *= -1;
854 Offset = CurDAG->getRegister(0, MVT::i32);
855 Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
856 return true;
857 }
858
859 return false;
860 }
861
862
SelectAddrMode2OffsetImm(SDNode * Op,SDValue N,SDValue & Offset,SDValue & Opc)863 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
864 SDValue &Offset, SDValue &Opc) {
865 unsigned Opcode = Op->getOpcode();
866 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
867 ? cast<LoadSDNode>(Op)->getAddressingMode()
868 : cast<StoreSDNode>(Op)->getAddressingMode();
869 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
870 ? ARM_AM::add : ARM_AM::sub;
871 int Val;
872 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
873 Offset = CurDAG->getRegister(0, MVT::i32);
874 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
875 ARM_AM::no_shift),
876 SDLoc(Op), MVT::i32);
877 return true;
878 }
879
880 return false;
881 }
882
SelectAddrOffsetNone(SDValue N,SDValue & Base)883 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
884 Base = N;
885 return true;
886 }
887
SelectAddrMode3(SDValue N,SDValue & Base,SDValue & Offset,SDValue & Opc)888 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
889 SDValue &Base, SDValue &Offset,
890 SDValue &Opc) {
891 if (N.getOpcode() == ISD::SUB) {
892 // X - C is canonicalize to X + -C, no need to handle it here.
893 Base = N.getOperand(0);
894 Offset = N.getOperand(1);
895 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
896 MVT::i32);
897 return true;
898 }
899
900 if (!CurDAG->isBaseWithConstantOffset(N)) {
901 Base = N;
902 if (N.getOpcode() == ISD::FrameIndex) {
903 int FI = cast<FrameIndexSDNode>(N)->getIndex();
904 Base = CurDAG->getTargetFrameIndex(
905 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
906 }
907 Offset = CurDAG->getRegister(0, MVT::i32);
908 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
909 MVT::i32);
910 return true;
911 }
912
913 // If the RHS is +/- imm8, fold into addr mode.
914 int RHSC;
915 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
916 -256 + 1, 256, RHSC)) { // 8 bits.
917 Base = N.getOperand(0);
918 if (Base.getOpcode() == ISD::FrameIndex) {
919 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
920 Base = CurDAG->getTargetFrameIndex(
921 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
922 }
923 Offset = CurDAG->getRegister(0, MVT::i32);
924
925 ARM_AM::AddrOpc AddSub = ARM_AM::add;
926 if (RHSC < 0) {
927 AddSub = ARM_AM::sub;
928 RHSC = -RHSC;
929 }
930 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
931 MVT::i32);
932 return true;
933 }
934
935 Base = N.getOperand(0);
936 Offset = N.getOperand(1);
937 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
938 MVT::i32);
939 return true;
940 }
941
SelectAddrMode3Offset(SDNode * Op,SDValue N,SDValue & Offset,SDValue & Opc)942 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
943 SDValue &Offset, SDValue &Opc) {
944 unsigned Opcode = Op->getOpcode();
945 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
946 ? cast<LoadSDNode>(Op)->getAddressingMode()
947 : cast<StoreSDNode>(Op)->getAddressingMode();
948 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
949 ? ARM_AM::add : ARM_AM::sub;
950 int Val;
951 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
952 Offset = CurDAG->getRegister(0, MVT::i32);
953 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
954 MVT::i32);
955 return true;
956 }
957
958 Offset = N;
959 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
960 MVT::i32);
961 return true;
962 }
963
IsAddressingMode5(SDValue N,SDValue & Base,SDValue & Offset,bool FP16)964 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
965 bool FP16) {
966 if (!CurDAG->isBaseWithConstantOffset(N)) {
967 Base = N;
968 if (N.getOpcode() == ISD::FrameIndex) {
969 int FI = cast<FrameIndexSDNode>(N)->getIndex();
970 Base = CurDAG->getTargetFrameIndex(
971 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
972 } else if (N.getOpcode() == ARMISD::Wrapper &&
973 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
974 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
975 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
976 Base = N.getOperand(0);
977 }
978 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
979 SDLoc(N), MVT::i32);
980 return true;
981 }
982
983 // If the RHS is +/- imm8, fold into addr mode.
984 int RHSC;
985 const int Scale = FP16 ? 2 : 4;
986
987 if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) {
988 Base = N.getOperand(0);
989 if (Base.getOpcode() == ISD::FrameIndex) {
990 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
991 Base = CurDAG->getTargetFrameIndex(
992 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
993 }
994
995 ARM_AM::AddrOpc AddSub = ARM_AM::add;
996 if (RHSC < 0) {
997 AddSub = ARM_AM::sub;
998 RHSC = -RHSC;
999 }
1000
1001 if (FP16)
1002 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
1003 SDLoc(N), MVT::i32);
1004 else
1005 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
1006 SDLoc(N), MVT::i32);
1007
1008 return true;
1009 }
1010
1011 Base = N;
1012
1013 if (FP16)
1014 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
1015 SDLoc(N), MVT::i32);
1016 else
1017 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1018 SDLoc(N), MVT::i32);
1019
1020 return true;
1021 }
1022
SelectAddrMode5(SDValue N,SDValue & Base,SDValue & Offset)1023 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1024 SDValue &Base, SDValue &Offset) {
1025 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
1026 }
1027
SelectAddrMode5FP16(SDValue N,SDValue & Base,SDValue & Offset)1028 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
1029 SDValue &Base, SDValue &Offset) {
1030 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
1031 }
1032
SelectAddrMode6(SDNode * Parent,SDValue N,SDValue & Addr,SDValue & Align)1033 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1034 SDValue &Align) {
1035 Addr = N;
1036
1037 unsigned Alignment = 0;
1038
1039 MemSDNode *MemN = cast<MemSDNode>(Parent);
1040
1041 if (isa<LSBaseSDNode>(MemN) ||
1042 ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1043 MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1044 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
1045 // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1046 // The maximum alignment is equal to the memory size being referenced.
1047 unsigned MMOAlign = MemN->getAlignment();
1048 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1049 if (MMOAlign >= MemSize && MemSize > 1)
1050 Alignment = MemSize;
1051 } else {
1052 // All other uses of addrmode6 are for intrinsics. For now just record
1053 // the raw alignment value; it will be refined later based on the legal
1054 // alignment operands for the intrinsic.
1055 Alignment = MemN->getAlignment();
1056 }
1057
1058 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1059 return true;
1060 }
1061
SelectAddrMode6Offset(SDNode * Op,SDValue N,SDValue & Offset)1062 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1063 SDValue &Offset) {
1064 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1065 ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1066 if (AM != ISD::POST_INC)
1067 return false;
1068 Offset = N;
1069 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1070 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1071 Offset = CurDAG->getRegister(0, MVT::i32);
1072 }
1073 return true;
1074 }
1075
SelectAddrModePC(SDValue N,SDValue & Offset,SDValue & Label)1076 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1077 SDValue &Offset, SDValue &Label) {
1078 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1079 Offset = N.getOperand(0);
1080 SDValue N1 = N.getOperand(1);
1081 Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1082 SDLoc(N), MVT::i32);
1083 return true;
1084 }
1085
1086 return false;
1087 }
1088
1089
1090 //===----------------------------------------------------------------------===//
1091 // Thumb Addressing Modes
1092 //===----------------------------------------------------------------------===//
1093
shouldUseZeroOffsetLdSt(SDValue N)1094 static bool shouldUseZeroOffsetLdSt(SDValue N) {
1095 // Negative numbers are difficult to materialise in thumb1. If we are
1096 // selecting the add of a negative, instead try to select ri with a zero
1097 // offset, so create the add node directly which will become a sub.
1098 if (N.getOpcode() != ISD::ADD)
1099 return false;
1100
1101 // Look for an imm which is not legal for ld/st, but is legal for sub.
1102 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1103 return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
1104
1105 return false;
1106 }
1107
SelectThumbAddrModeRRSext(SDValue N,SDValue & Base,SDValue & Offset)1108 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1109 SDValue &Offset) {
1110 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1111 ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1112 if (!NC || !NC->isNullValue())
1113 return false;
1114
1115 Base = Offset = N;
1116 return true;
1117 }
1118
1119 Base = N.getOperand(0);
1120 Offset = N.getOperand(1);
1121 return true;
1122 }
1123
SelectThumbAddrModeRR(SDValue N,SDValue & Base,SDValue & Offset)1124 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1125 SDValue &Offset) {
1126 if (shouldUseZeroOffsetLdSt(N))
1127 return false; // Select ri instead
1128 return SelectThumbAddrModeRRSext(N, Base, Offset);
1129 }
1130
1131 bool
SelectThumbAddrModeImm5S(SDValue N,unsigned Scale,SDValue & Base,SDValue & OffImm)1132 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1133 SDValue &Base, SDValue &OffImm) {
1134 if (shouldUseZeroOffsetLdSt(N)) {
1135 Base = N;
1136 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1137 return true;
1138 }
1139
1140 if (!CurDAG->isBaseWithConstantOffset(N)) {
1141 if (N.getOpcode() == ISD::ADD) {
1142 return false; // We want to select register offset instead
1143 } else if (N.getOpcode() == ARMISD::Wrapper &&
1144 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1145 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1146 N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1147 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1148 Base = N.getOperand(0);
1149 } else {
1150 Base = N;
1151 }
1152
1153 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1154 return true;
1155 }
1156
1157 // If the RHS is + imm5 * scale, fold into addr mode.
1158 int RHSC;
1159 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1160 Base = N.getOperand(0);
1161 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1162 return true;
1163 }
1164
1165 // Offset is too large, so use register offset instead.
1166 return false;
1167 }
1168
1169 bool
SelectThumbAddrModeImm5S4(SDValue N,SDValue & Base,SDValue & OffImm)1170 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1171 SDValue &OffImm) {
1172 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1173 }
1174
1175 bool
SelectThumbAddrModeImm5S2(SDValue N,SDValue & Base,SDValue & OffImm)1176 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1177 SDValue &OffImm) {
1178 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1179 }
1180
1181 bool
SelectThumbAddrModeImm5S1(SDValue N,SDValue & Base,SDValue & OffImm)1182 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1183 SDValue &OffImm) {
1184 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1185 }
1186
SelectThumbAddrModeSP(SDValue N,SDValue & Base,SDValue & OffImm)1187 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1188 SDValue &Base, SDValue &OffImm) {
1189 if (N.getOpcode() == ISD::FrameIndex) {
1190 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1191 // Only multiples of 4 are allowed for the offset, so the frame object
1192 // alignment must be at least 4.
1193 MachineFrameInfo &MFI = MF->getFrameInfo();
1194 if (MFI.getObjectAlign(FI) < Align(4))
1195 MFI.setObjectAlignment(FI, Align(4));
1196 Base = CurDAG->getTargetFrameIndex(
1197 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1198 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1199 return true;
1200 }
1201
1202 if (!CurDAG->isBaseWithConstantOffset(N))
1203 return false;
1204
1205 if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
1206 // If the RHS is + imm8 * scale, fold into addr mode.
1207 int RHSC;
1208 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1209 Base = N.getOperand(0);
1210 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1211 // Make sure the offset is inside the object, or we might fail to
1212 // allocate an emergency spill slot. (An out-of-range access is UB, but
1213 // it could show up anyway.)
1214 MachineFrameInfo &MFI = MF->getFrameInfo();
1215 if (RHSC * 4 < MFI.getObjectSize(FI)) {
1216 // For LHS+RHS to result in an offset that's a multiple of 4 the object
1217 // indexed by the LHS must be 4-byte aligned.
1218 if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlign(FI) < Align(4))
1219 MFI.setObjectAlignment(FI, Align(4));
1220 if (MFI.getObjectAlign(FI) >= Align(4)) {
1221 Base = CurDAG->getTargetFrameIndex(
1222 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1223 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1224 return true;
1225 }
1226 }
1227 }
1228 }
1229
1230 return false;
1231 }
1232
1233 template <unsigned Shift>
SelectTAddrModeImm7(SDValue N,SDValue & Base,SDValue & OffImm)1234 bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base,
1235 SDValue &OffImm) {
1236 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1237 int RHSC;
1238 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1239 RHSC)) {
1240 Base = N.getOperand(0);
1241 if (N.getOpcode() == ISD::SUB)
1242 RHSC = -RHSC;
1243 OffImm =
1244 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1245 return true;
1246 }
1247 }
1248
1249 // Base only.
1250 Base = N;
1251 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1252 return true;
1253 }
1254
1255
1256 //===----------------------------------------------------------------------===//
1257 // Thumb 2 Addressing Modes
1258 //===----------------------------------------------------------------------===//
1259
1260
SelectT2AddrModeImm12(SDValue N,SDValue & Base,SDValue & OffImm)1261 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1262 SDValue &Base, SDValue &OffImm) {
1263 // Match simple R + imm12 operands.
1264
1265 // Base only.
1266 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1267 !CurDAG->isBaseWithConstantOffset(N)) {
1268 if (N.getOpcode() == ISD::FrameIndex) {
1269 // Match frame index.
1270 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1271 Base = CurDAG->getTargetFrameIndex(
1272 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1273 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1274 return true;
1275 }
1276
1277 if (N.getOpcode() == ARMISD::Wrapper &&
1278 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1279 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1280 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1281 Base = N.getOperand(0);
1282 if (Base.getOpcode() == ISD::TargetConstantPool)
1283 return false; // We want to select t2LDRpci instead.
1284 } else
1285 Base = N;
1286 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1287 return true;
1288 }
1289
1290 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1291 if (SelectT2AddrModeImm8(N, Base, OffImm))
1292 // Let t2LDRi8 handle (R - imm8).
1293 return false;
1294
1295 int RHSC = (int)RHS->getZExtValue();
1296 if (N.getOpcode() == ISD::SUB)
1297 RHSC = -RHSC;
1298
1299 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1300 Base = N.getOperand(0);
1301 if (Base.getOpcode() == ISD::FrameIndex) {
1302 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1303 Base = CurDAG->getTargetFrameIndex(
1304 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1305 }
1306 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1307 return true;
1308 }
1309 }
1310
1311 // Base only.
1312 Base = N;
1313 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1314 return true;
1315 }
1316
1317 template <unsigned Shift>
SelectT2AddrModeImm8(SDValue N,SDValue & Base,SDValue & OffImm)1318 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base,
1319 SDValue &OffImm) {
1320 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1321 int RHSC;
1322 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) {
1323 Base = N.getOperand(0);
1324 if (Base.getOpcode() == ISD::FrameIndex) {
1325 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1326 Base = CurDAG->getTargetFrameIndex(
1327 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1328 }
1329
1330 if (N.getOpcode() == ISD::SUB)
1331 RHSC = -RHSC;
1332 OffImm =
1333 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1334 return true;
1335 }
1336 }
1337
1338 // Base only.
1339 Base = N;
1340 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1341 return true;
1342 }
1343
SelectT2AddrModeImm8(SDValue N,SDValue & Base,SDValue & OffImm)1344 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1345 SDValue &Base, SDValue &OffImm) {
1346 // Match simple R - imm8 operands.
1347 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1348 !CurDAG->isBaseWithConstantOffset(N))
1349 return false;
1350
1351 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1352 int RHSC = (int)RHS->getSExtValue();
1353 if (N.getOpcode() == ISD::SUB)
1354 RHSC = -RHSC;
1355
1356 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1357 Base = N.getOperand(0);
1358 if (Base.getOpcode() == ISD::FrameIndex) {
1359 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1360 Base = CurDAG->getTargetFrameIndex(
1361 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1362 }
1363 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1364 return true;
1365 }
1366 }
1367
1368 return false;
1369 }
1370
SelectT2AddrModeImm8Offset(SDNode * Op,SDValue N,SDValue & OffImm)1371 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1372 SDValue &OffImm){
1373 unsigned Opcode = Op->getOpcode();
1374 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1375 ? cast<LoadSDNode>(Op)->getAddressingMode()
1376 : cast<StoreSDNode>(Op)->getAddressingMode();
1377 int RHSC;
1378 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1379 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1380 ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1381 : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1382 return true;
1383 }
1384
1385 return false;
1386 }
1387
1388 template <unsigned Shift>
SelectT2AddrModeImm7(SDValue N,SDValue & Base,SDValue & OffImm)1389 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base,
1390 SDValue &OffImm) {
1391 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1392 int RHSC;
1393 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1394 RHSC)) {
1395 Base = N.getOperand(0);
1396 if (Base.getOpcode() == ISD::FrameIndex) {
1397 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1398 Base = CurDAG->getTargetFrameIndex(
1399 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1400 }
1401
1402 if (N.getOpcode() == ISD::SUB)
1403 RHSC = -RHSC;
1404 OffImm =
1405 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1406 return true;
1407 }
1408 }
1409
1410 // Base only.
1411 Base = N;
1412 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1413 return true;
1414 }
1415
1416 template <unsigned Shift>
SelectT2AddrModeImm7Offset(SDNode * Op,SDValue N,SDValue & OffImm)1417 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1418 SDValue &OffImm) {
1419 return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);
1420 }
1421
SelectT2AddrModeImm7Offset(SDNode * Op,SDValue N,SDValue & OffImm,unsigned Shift)1422 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1423 SDValue &OffImm,
1424 unsigned Shift) {
1425 unsigned Opcode = Op->getOpcode();
1426 ISD::MemIndexedMode AM;
1427 switch (Opcode) {
1428 case ISD::LOAD:
1429 AM = cast<LoadSDNode>(Op)->getAddressingMode();
1430 break;
1431 case ISD::STORE:
1432 AM = cast<StoreSDNode>(Op)->getAddressingMode();
1433 break;
1434 case ISD::MLOAD:
1435 AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode();
1436 break;
1437 case ISD::MSTORE:
1438 AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode();
1439 break;
1440 default:
1441 llvm_unreachable("Unexpected Opcode for Imm7Offset");
1442 }
1443
1444 int RHSC;
1445 // 7 bit constant, shifted by Shift.
1446 if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) {
1447 OffImm =
1448 ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1449 ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32)
1450 : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N),
1451 MVT::i32);
1452 return true;
1453 }
1454 return false;
1455 }
1456
1457 template <int Min, int Max>
SelectImmediateInRange(SDValue N,SDValue & OffImm)1458 bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) {
1459 int Val;
1460 if (isScaledConstantInRange(N, 1, Min, Max, Val)) {
1461 OffImm = CurDAG->getTargetConstant(Val, SDLoc(N), MVT::i32);
1462 return true;
1463 }
1464 return false;
1465 }
1466
SelectT2AddrModeSoReg(SDValue N,SDValue & Base,SDValue & OffReg,SDValue & ShImm)1467 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1468 SDValue &Base,
1469 SDValue &OffReg, SDValue &ShImm) {
1470 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1471 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1472 return false;
1473
1474 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1475 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1476 int RHSC = (int)RHS->getZExtValue();
1477 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1478 return false;
1479 else if (RHSC < 0 && RHSC >= -255) // 8 bits
1480 return false;
1481 }
1482
1483 // Look for (R + R) or (R + (R << [1,2,3])).
1484 unsigned ShAmt = 0;
1485 Base = N.getOperand(0);
1486 OffReg = N.getOperand(1);
1487
1488 // Swap if it is ((R << c) + R).
1489 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1490 if (ShOpcVal != ARM_AM::lsl) {
1491 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1492 if (ShOpcVal == ARM_AM::lsl)
1493 std::swap(Base, OffReg);
1494 }
1495
1496 if (ShOpcVal == ARM_AM::lsl) {
1497 // Check to see if the RHS of the shift is a constant, if not, we can't fold
1498 // it.
1499 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1500 ShAmt = Sh->getZExtValue();
1501 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1502 OffReg = OffReg.getOperand(0);
1503 else {
1504 ShAmt = 0;
1505 }
1506 }
1507 }
1508
1509 // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1510 // and use it in a shifted operand do so.
1511 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1512 unsigned PowerOfTwo = 0;
1513 SDValue NewMulConst;
1514 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1515 HandleSDNode Handle(OffReg);
1516 replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1517 OffReg = Handle.getValue();
1518 ShAmt = PowerOfTwo;
1519 }
1520 }
1521
1522 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1523
1524 return true;
1525 }
1526
SelectT2AddrModeExclusive(SDValue N,SDValue & Base,SDValue & OffImm)1527 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1528 SDValue &OffImm) {
1529 // This *must* succeed since it's used for the irreplaceable ldrex and strex
1530 // instructions.
1531 Base = N;
1532 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1533
1534 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1535 return true;
1536
1537 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1538 if (!RHS)
1539 return true;
1540
1541 uint32_t RHSC = (int)RHS->getZExtValue();
1542 if (RHSC > 1020 || RHSC % 4 != 0)
1543 return true;
1544
1545 Base = N.getOperand(0);
1546 if (Base.getOpcode() == ISD::FrameIndex) {
1547 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1548 Base = CurDAG->getTargetFrameIndex(
1549 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1550 }
1551
1552 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1553 return true;
1554 }
1555
1556 //===--------------------------------------------------------------------===//
1557
1558 /// getAL - Returns a ARMCC::AL immediate node.
getAL(SelectionDAG * CurDAG,const SDLoc & dl)1559 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1560 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1561 }
1562
transferMemOperands(SDNode * N,SDNode * Result)1563 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1564 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1565 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
1566 }
1567
tryARMIndexedLoad(SDNode * N)1568 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1569 LoadSDNode *LD = cast<LoadSDNode>(N);
1570 ISD::MemIndexedMode AM = LD->getAddressingMode();
1571 if (AM == ISD::UNINDEXED)
1572 return false;
1573
1574 EVT LoadedVT = LD->getMemoryVT();
1575 SDValue Offset, AMOpc;
1576 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1577 unsigned Opcode = 0;
1578 bool Match = false;
1579 if (LoadedVT == MVT::i32 && isPre &&
1580 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1581 Opcode = ARM::LDR_PRE_IMM;
1582 Match = true;
1583 } else if (LoadedVT == MVT::i32 && !isPre &&
1584 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1585 Opcode = ARM::LDR_POST_IMM;
1586 Match = true;
1587 } else if (LoadedVT == MVT::i32 &&
1588 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1589 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1590 Match = true;
1591
1592 } else if (LoadedVT == MVT::i16 &&
1593 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1594 Match = true;
1595 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1596 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1597 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1598 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1599 if (LD->getExtensionType() == ISD::SEXTLOAD) {
1600 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1601 Match = true;
1602 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1603 }
1604 } else {
1605 if (isPre &&
1606 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1607 Match = true;
1608 Opcode = ARM::LDRB_PRE_IMM;
1609 } else if (!isPre &&
1610 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1611 Match = true;
1612 Opcode = ARM::LDRB_POST_IMM;
1613 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1614 Match = true;
1615 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1616 }
1617 }
1618 }
1619
1620 if (Match) {
1621 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1622 SDValue Chain = LD->getChain();
1623 SDValue Base = LD->getBasePtr();
1624 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1625 CurDAG->getRegister(0, MVT::i32), Chain };
1626 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1627 MVT::Other, Ops);
1628 transferMemOperands(N, New);
1629 ReplaceNode(N, New);
1630 return true;
1631 } else {
1632 SDValue Chain = LD->getChain();
1633 SDValue Base = LD->getBasePtr();
1634 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1635 CurDAG->getRegister(0, MVT::i32), Chain };
1636 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1637 MVT::Other, Ops);
1638 transferMemOperands(N, New);
1639 ReplaceNode(N, New);
1640 return true;
1641 }
1642 }
1643
1644 return false;
1645 }
1646
tryT1IndexedLoad(SDNode * N)1647 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1648 LoadSDNode *LD = cast<LoadSDNode>(N);
1649 EVT LoadedVT = LD->getMemoryVT();
1650 ISD::MemIndexedMode AM = LD->getAddressingMode();
1651 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1652 LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1653 return false;
1654
1655 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1656 if (!COffs || COffs->getZExtValue() != 4)
1657 return false;
1658
1659 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1660 // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1661 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1662 // ISel.
1663 SDValue Chain = LD->getChain();
1664 SDValue Base = LD->getBasePtr();
1665 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1666 CurDAG->getRegister(0, MVT::i32), Chain };
1667 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1668 MVT::i32, MVT::Other, Ops);
1669 transferMemOperands(N, New);
1670 ReplaceNode(N, New);
1671 return true;
1672 }
1673
tryT2IndexedLoad(SDNode * N)1674 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1675 LoadSDNode *LD = cast<LoadSDNode>(N);
1676 ISD::MemIndexedMode AM = LD->getAddressingMode();
1677 if (AM == ISD::UNINDEXED)
1678 return false;
1679
1680 EVT LoadedVT = LD->getMemoryVT();
1681 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1682 SDValue Offset;
1683 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1684 unsigned Opcode = 0;
1685 bool Match = false;
1686 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1687 switch (LoadedVT.getSimpleVT().SimpleTy) {
1688 case MVT::i32:
1689 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1690 break;
1691 case MVT::i16:
1692 if (isSExtLd)
1693 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1694 else
1695 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1696 break;
1697 case MVT::i8:
1698 case MVT::i1:
1699 if (isSExtLd)
1700 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1701 else
1702 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1703 break;
1704 default:
1705 return false;
1706 }
1707 Match = true;
1708 }
1709
1710 if (Match) {
1711 SDValue Chain = LD->getChain();
1712 SDValue Base = LD->getBasePtr();
1713 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1714 CurDAG->getRegister(0, MVT::i32), Chain };
1715 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1716 MVT::Other, Ops);
1717 transferMemOperands(N, New);
1718 ReplaceNode(N, New);
1719 return true;
1720 }
1721
1722 return false;
1723 }
1724
tryMVEIndexedLoad(SDNode * N)1725 bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
1726 EVT LoadedVT;
1727 unsigned Opcode = 0;
1728 bool isSExtLd, isPre;
1729 Align Alignment;
1730 ARMVCC::VPTCodes Pred;
1731 SDValue PredReg;
1732 SDValue Chain, Base, Offset;
1733
1734 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1735 ISD::MemIndexedMode AM = LD->getAddressingMode();
1736 if (AM == ISD::UNINDEXED)
1737 return false;
1738 LoadedVT = LD->getMemoryVT();
1739 if (!LoadedVT.isVector())
1740 return false;
1741
1742 Chain = LD->getChain();
1743 Base = LD->getBasePtr();
1744 Offset = LD->getOffset();
1745 Alignment = LD->getAlign();
1746 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1747 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1748 Pred = ARMVCC::None;
1749 PredReg = CurDAG->getRegister(0, MVT::i32);
1750 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
1751 ISD::MemIndexedMode AM = LD->getAddressingMode();
1752 if (AM == ISD::UNINDEXED)
1753 return false;
1754 LoadedVT = LD->getMemoryVT();
1755 if (!LoadedVT.isVector())
1756 return false;
1757
1758 Chain = LD->getChain();
1759 Base = LD->getBasePtr();
1760 Offset = LD->getOffset();
1761 Alignment = LD->getAlign();
1762 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1763 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1764 Pred = ARMVCC::Then;
1765 PredReg = LD->getMask();
1766 } else
1767 llvm_unreachable("Expected a Load or a Masked Load!");
1768
1769 // We allow LE non-masked loads to change the type (for example use a vldrb.8
1770 // as opposed to a vldrw.32). This can allow extra addressing modes or
1771 // alignments for what is otherwise an equivalent instruction.
1772 bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N);
1773
1774 SDValue NewOffset;
1775 if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 &&
1776 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) {
1777 if (isSExtLd)
1778 Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
1779 else
1780 Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
1781 } else if (LoadedVT == MVT::v8i8 &&
1782 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
1783 if (isSExtLd)
1784 Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
1785 else
1786 Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
1787 } else if (LoadedVT == MVT::v4i8 &&
1788 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
1789 if (isSExtLd)
1790 Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
1791 else
1792 Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
1793 } else if (Alignment >= Align(4) &&
1794 (CanChangeType || LoadedVT == MVT::v4i32 ||
1795 LoadedVT == MVT::v4f32) &&
1796 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2))
1797 Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
1798 else if (Alignment >= Align(2) &&
1799 (CanChangeType || LoadedVT == MVT::v8i16 ||
1800 LoadedVT == MVT::v8f16) &&
1801 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1))
1802 Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
1803 else if ((CanChangeType || LoadedVT == MVT::v16i8) &&
1804 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0))
1805 Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
1806 else
1807 return false;
1808
1809 SDValue Ops[] = {Base, NewOffset,
1810 CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32), PredReg,
1811 Chain};
1812 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1813 N->getValueType(0), MVT::Other, Ops);
1814 transferMemOperands(N, New);
1815 ReplaceUses(SDValue(N, 0), SDValue(New, 1));
1816 ReplaceUses(SDValue(N, 1), SDValue(New, 0));
1817 ReplaceUses(SDValue(N, 2), SDValue(New, 2));
1818 CurDAG->RemoveDeadNode(N);
1819 return true;
1820 }
1821
1822 /// Form a GPRPair pseudo register from a pair of GPR regs.
createGPRPairNode(EVT VT,SDValue V0,SDValue V1)1823 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1824 SDLoc dl(V0.getNode());
1825 SDValue RegClass =
1826 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1827 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1828 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1829 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1830 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1831 }
1832
1833 /// Form a D register from a pair of S registers.
createSRegPairNode(EVT VT,SDValue V0,SDValue V1)1834 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1835 SDLoc dl(V0.getNode());
1836 SDValue RegClass =
1837 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1838 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1839 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1840 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1841 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1842 }
1843
1844 /// Form a quad register from a pair of D registers.
createDRegPairNode(EVT VT,SDValue V0,SDValue V1)1845 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1846 SDLoc dl(V0.getNode());
1847 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1848 MVT::i32);
1849 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1850 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1851 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1852 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1853 }
1854
1855 /// Form 4 consecutive D registers from a pair of Q registers.
createQRegPairNode(EVT VT,SDValue V0,SDValue V1)1856 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1857 SDLoc dl(V0.getNode());
1858 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1859 MVT::i32);
1860 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1861 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1862 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1863 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1864 }
1865
1866 /// Form 4 consecutive S registers.
createQuadSRegsNode(EVT VT,SDValue V0,SDValue V1,SDValue V2,SDValue V3)1867 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1868 SDValue V2, SDValue V3) {
1869 SDLoc dl(V0.getNode());
1870 SDValue RegClass =
1871 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1872 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1873 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1874 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1875 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1876 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1877 V2, SubReg2, V3, SubReg3 };
1878 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1879 }
1880
1881 /// Form 4 consecutive D registers.
createQuadDRegsNode(EVT VT,SDValue V0,SDValue V1,SDValue V2,SDValue V3)1882 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1883 SDValue V2, SDValue V3) {
1884 SDLoc dl(V0.getNode());
1885 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1886 MVT::i32);
1887 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1888 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1889 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1890 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1891 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1892 V2, SubReg2, V3, SubReg3 };
1893 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1894 }
1895
1896 /// Form 4 consecutive Q registers.
createQuadQRegsNode(EVT VT,SDValue V0,SDValue V1,SDValue V2,SDValue V3)1897 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1898 SDValue V2, SDValue V3) {
1899 SDLoc dl(V0.getNode());
1900 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1901 MVT::i32);
1902 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1903 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1904 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1905 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1906 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1907 V2, SubReg2, V3, SubReg3 };
1908 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1909 }
1910
1911 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1912 /// of a NEON VLD or VST instruction. The supported values depend on the
1913 /// number of registers being loaded.
GetVLDSTAlign(SDValue Align,const SDLoc & dl,unsigned NumVecs,bool is64BitVector)1914 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1915 unsigned NumVecs, bool is64BitVector) {
1916 unsigned NumRegs = NumVecs;
1917 if (!is64BitVector && NumVecs < 3)
1918 NumRegs *= 2;
1919
1920 unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1921 if (Alignment >= 32 && NumRegs == 4)
1922 Alignment = 32;
1923 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1924 Alignment = 16;
1925 else if (Alignment >= 8)
1926 Alignment = 8;
1927 else
1928 Alignment = 0;
1929
1930 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1931 }
1932
isVLDfixed(unsigned Opc)1933 static bool isVLDfixed(unsigned Opc)
1934 {
1935 switch (Opc) {
1936 default: return false;
1937 case ARM::VLD1d8wb_fixed : return true;
1938 case ARM::VLD1d16wb_fixed : return true;
1939 case ARM::VLD1d64Qwb_fixed : return true;
1940 case ARM::VLD1d32wb_fixed : return true;
1941 case ARM::VLD1d64wb_fixed : return true;
1942 case ARM::VLD1d64TPseudoWB_fixed : return true;
1943 case ARM::VLD1d64QPseudoWB_fixed : return true;
1944 case ARM::VLD1q8wb_fixed : return true;
1945 case ARM::VLD1q16wb_fixed : return true;
1946 case ARM::VLD1q32wb_fixed : return true;
1947 case ARM::VLD1q64wb_fixed : return true;
1948 case ARM::VLD1DUPd8wb_fixed : return true;
1949 case ARM::VLD1DUPd16wb_fixed : return true;
1950 case ARM::VLD1DUPd32wb_fixed : return true;
1951 case ARM::VLD1DUPq8wb_fixed : return true;
1952 case ARM::VLD1DUPq16wb_fixed : return true;
1953 case ARM::VLD1DUPq32wb_fixed : return true;
1954 case ARM::VLD2d8wb_fixed : return true;
1955 case ARM::VLD2d16wb_fixed : return true;
1956 case ARM::VLD2d32wb_fixed : return true;
1957 case ARM::VLD2q8PseudoWB_fixed : return true;
1958 case ARM::VLD2q16PseudoWB_fixed : return true;
1959 case ARM::VLD2q32PseudoWB_fixed : return true;
1960 case ARM::VLD2DUPd8wb_fixed : return true;
1961 case ARM::VLD2DUPd16wb_fixed : return true;
1962 case ARM::VLD2DUPd32wb_fixed : return true;
1963 }
1964 }
1965
isVSTfixed(unsigned Opc)1966 static bool isVSTfixed(unsigned Opc)
1967 {
1968 switch (Opc) {
1969 default: return false;
1970 case ARM::VST1d8wb_fixed : return true;
1971 case ARM::VST1d16wb_fixed : return true;
1972 case ARM::VST1d32wb_fixed : return true;
1973 case ARM::VST1d64wb_fixed : return true;
1974 case ARM::VST1q8wb_fixed : return true;
1975 case ARM::VST1q16wb_fixed : return true;
1976 case ARM::VST1q32wb_fixed : return true;
1977 case ARM::VST1q64wb_fixed : return true;
1978 case ARM::VST1d64TPseudoWB_fixed : return true;
1979 case ARM::VST1d64QPseudoWB_fixed : return true;
1980 case ARM::VST2d8wb_fixed : return true;
1981 case ARM::VST2d16wb_fixed : return true;
1982 case ARM::VST2d32wb_fixed : return true;
1983 case ARM::VST2q8PseudoWB_fixed : return true;
1984 case ARM::VST2q16PseudoWB_fixed : return true;
1985 case ARM::VST2q32PseudoWB_fixed : return true;
1986 }
1987 }
1988
1989 // Get the register stride update opcode of a VLD/VST instruction that
1990 // is otherwise equivalent to the given fixed stride updating instruction.
getVLDSTRegisterUpdateOpcode(unsigned Opc)1991 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1992 assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1993 && "Incorrect fixed stride updating instruction.");
1994 switch (Opc) {
1995 default: break;
1996 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1997 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1998 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1999 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
2000 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
2001 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
2002 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
2003 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
2004 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
2005 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
2006 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
2007 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
2008 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
2009 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
2010 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
2011 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
2012 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
2013 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
2014
2015 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
2016 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
2017 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
2018 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
2019 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
2020 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
2021 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
2022 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
2023 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
2024 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
2025
2026 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
2027 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
2028 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
2029 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
2030 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
2031 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
2032
2033 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
2034 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
2035 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
2036 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
2037 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
2038 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
2039
2040 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
2041 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
2042 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
2043 }
2044 return Opc; // If not one we handle, return it unchanged.
2045 }
2046
2047 /// Returns true if the given increment is a Constant known to be equal to the
2048 /// access size performed by a NEON load/store. This means the "[rN]!" form can
2049 /// be used.
isPerfectIncrement(SDValue Inc,EVT VecTy,unsigned NumVecs)2050 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
2051 auto C = dyn_cast<ConstantSDNode>(Inc);
2052 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
2053 }
2054
SelectVLD(SDNode * N,bool isUpdating,unsigned NumVecs,const uint16_t * DOpcodes,const uint16_t * QOpcodes0,const uint16_t * QOpcodes1)2055 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
2056 const uint16_t *DOpcodes,
2057 const uint16_t *QOpcodes0,
2058 const uint16_t *QOpcodes1) {
2059 assert(Subtarget->hasNEON());
2060 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
2061 SDLoc dl(N);
2062
2063 SDValue MemAddr, Align;
2064 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2065 // nodes are not intrinsics.
2066 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2067 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2068 return;
2069
2070 SDValue Chain = N->getOperand(0);
2071 EVT VT = N->getValueType(0);
2072 bool is64BitVector = VT.is64BitVector();
2073 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2074
2075 unsigned OpcodeIndex;
2076 switch (VT.getSimpleVT().SimpleTy) {
2077 default: llvm_unreachable("unhandled vld type");
2078 // Double-register operations:
2079 case MVT::v8i8: OpcodeIndex = 0; break;
2080 case MVT::v4f16:
2081 case MVT::v4bf16:
2082 case MVT::v4i16: OpcodeIndex = 1; break;
2083 case MVT::v2f32:
2084 case MVT::v2i32: OpcodeIndex = 2; break;
2085 case MVT::v1i64: OpcodeIndex = 3; break;
2086 // Quad-register operations:
2087 case MVT::v16i8: OpcodeIndex = 0; break;
2088 case MVT::v8f16:
2089 case MVT::v8bf16:
2090 case MVT::v8i16: OpcodeIndex = 1; break;
2091 case MVT::v4f32:
2092 case MVT::v4i32: OpcodeIndex = 2; break;
2093 case MVT::v2f64:
2094 case MVT::v2i64: OpcodeIndex = 3; break;
2095 }
2096
2097 EVT ResTy;
2098 if (NumVecs == 1)
2099 ResTy = VT;
2100 else {
2101 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2102 if (!is64BitVector)
2103 ResTyElts *= 2;
2104 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2105 }
2106 std::vector<EVT> ResTys;
2107 ResTys.push_back(ResTy);
2108 if (isUpdating)
2109 ResTys.push_back(MVT::i32);
2110 ResTys.push_back(MVT::Other);
2111
2112 SDValue Pred = getAL(CurDAG, dl);
2113 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2114 SDNode *VLd;
2115 SmallVector<SDValue, 7> Ops;
2116
2117 // Double registers and VLD1/VLD2 quad registers are directly supported.
2118 if (is64BitVector || NumVecs <= 2) {
2119 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2120 QOpcodes0[OpcodeIndex]);
2121 Ops.push_back(MemAddr);
2122 Ops.push_back(Align);
2123 if (isUpdating) {
2124 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2125 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2126 if (!IsImmUpdate) {
2127 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
2128 // check for the opcode rather than the number of vector elements.
2129 if (isVLDfixed(Opc))
2130 Opc = getVLDSTRegisterUpdateOpcode(Opc);
2131 Ops.push_back(Inc);
2132 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
2133 // the operands if not such an opcode.
2134 } else if (!isVLDfixed(Opc))
2135 Ops.push_back(Reg0);
2136 }
2137 Ops.push_back(Pred);
2138 Ops.push_back(Reg0);
2139 Ops.push_back(Chain);
2140 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2141
2142 } else {
2143 // Otherwise, quad registers are loaded with two separate instructions,
2144 // where one loads the even registers and the other loads the odd registers.
2145 EVT AddrTy = MemAddr.getValueType();
2146
2147 // Load the even subregs. This is always an updating load, so that it
2148 // provides the address to the second load for the odd subregs.
2149 SDValue ImplDef =
2150 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2151 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
2152 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2153 ResTy, AddrTy, MVT::Other, OpsA);
2154 Chain = SDValue(VLdA, 2);
2155
2156 // Load the odd subregs.
2157 Ops.push_back(SDValue(VLdA, 1));
2158 Ops.push_back(Align);
2159 if (isUpdating) {
2160 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2161 assert(isa<ConstantSDNode>(Inc.getNode()) &&
2162 "only constant post-increment update allowed for VLD3/4");
2163 (void)Inc;
2164 Ops.push_back(Reg0);
2165 }
2166 Ops.push_back(SDValue(VLdA, 0));
2167 Ops.push_back(Pred);
2168 Ops.push_back(Reg0);
2169 Ops.push_back(Chain);
2170 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
2171 }
2172
2173 // Transfer memoperands.
2174 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2175 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
2176
2177 if (NumVecs == 1) {
2178 ReplaceNode(N, VLd);
2179 return;
2180 }
2181
2182 // Extract out the subregisters.
2183 SDValue SuperReg = SDValue(VLd, 0);
2184 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2185 ARM::qsub_3 == ARM::qsub_0 + 3,
2186 "Unexpected subreg numbering");
2187 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
2188 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2189 ReplaceUses(SDValue(N, Vec),
2190 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2191 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
2192 if (isUpdating)
2193 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
2194 CurDAG->RemoveDeadNode(N);
2195 }
2196
SelectVST(SDNode * N,bool isUpdating,unsigned NumVecs,const uint16_t * DOpcodes,const uint16_t * QOpcodes0,const uint16_t * QOpcodes1)2197 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
2198 const uint16_t *DOpcodes,
2199 const uint16_t *QOpcodes0,
2200 const uint16_t *QOpcodes1) {
2201 assert(Subtarget->hasNEON());
2202 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
2203 SDLoc dl(N);
2204
2205 SDValue MemAddr, Align;
2206 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2207 // nodes are not intrinsics.
2208 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2209 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2210 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2211 return;
2212
2213 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2214
2215 SDValue Chain = N->getOperand(0);
2216 EVT VT = N->getOperand(Vec0Idx).getValueType();
2217 bool is64BitVector = VT.is64BitVector();
2218 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2219
2220 unsigned OpcodeIndex;
2221 switch (VT.getSimpleVT().SimpleTy) {
2222 default: llvm_unreachable("unhandled vst type");
2223 // Double-register operations:
2224 case MVT::v8i8: OpcodeIndex = 0; break;
2225 case MVT::v4f16:
2226 case MVT::v4bf16:
2227 case MVT::v4i16: OpcodeIndex = 1; break;
2228 case MVT::v2f32:
2229 case MVT::v2i32: OpcodeIndex = 2; break;
2230 case MVT::v1i64: OpcodeIndex = 3; break;
2231 // Quad-register operations:
2232 case MVT::v16i8: OpcodeIndex = 0; break;
2233 case MVT::v8f16:
2234 case MVT::v8bf16:
2235 case MVT::v8i16: OpcodeIndex = 1; break;
2236 case MVT::v4f32:
2237 case MVT::v4i32: OpcodeIndex = 2; break;
2238 case MVT::v2f64:
2239 case MVT::v2i64: OpcodeIndex = 3; break;
2240 }
2241
2242 std::vector<EVT> ResTys;
2243 if (isUpdating)
2244 ResTys.push_back(MVT::i32);
2245 ResTys.push_back(MVT::Other);
2246
2247 SDValue Pred = getAL(CurDAG, dl);
2248 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2249 SmallVector<SDValue, 7> Ops;
2250
2251 // Double registers and VST1/VST2 quad registers are directly supported.
2252 if (is64BitVector || NumVecs <= 2) {
2253 SDValue SrcReg;
2254 if (NumVecs == 1) {
2255 SrcReg = N->getOperand(Vec0Idx);
2256 } else if (is64BitVector) {
2257 // Form a REG_SEQUENCE to force register allocation.
2258 SDValue V0 = N->getOperand(Vec0Idx + 0);
2259 SDValue V1 = N->getOperand(Vec0Idx + 1);
2260 if (NumVecs == 2)
2261 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2262 else {
2263 SDValue V2 = N->getOperand(Vec0Idx + 2);
2264 // If it's a vst3, form a quad D-register and leave the last part as
2265 // an undef.
2266 SDValue V3 = (NumVecs == 3)
2267 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2268 : N->getOperand(Vec0Idx + 3);
2269 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2270 }
2271 } else {
2272 // Form a QQ register.
2273 SDValue Q0 = N->getOperand(Vec0Idx);
2274 SDValue Q1 = N->getOperand(Vec0Idx + 1);
2275 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2276 }
2277
2278 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2279 QOpcodes0[OpcodeIndex]);
2280 Ops.push_back(MemAddr);
2281 Ops.push_back(Align);
2282 if (isUpdating) {
2283 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2284 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2285 if (!IsImmUpdate) {
2286 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2287 // check for the opcode rather than the number of vector elements.
2288 if (isVSTfixed(Opc))
2289 Opc = getVLDSTRegisterUpdateOpcode(Opc);
2290 Ops.push_back(Inc);
2291 }
2292 // VST1/VST2 fixed increment does not need Reg0 so only include it in
2293 // the operands if not such an opcode.
2294 else if (!isVSTfixed(Opc))
2295 Ops.push_back(Reg0);
2296 }
2297 Ops.push_back(SrcReg);
2298 Ops.push_back(Pred);
2299 Ops.push_back(Reg0);
2300 Ops.push_back(Chain);
2301 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2302
2303 // Transfer memoperands.
2304 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
2305
2306 ReplaceNode(N, VSt);
2307 return;
2308 }
2309
2310 // Otherwise, quad registers are stored with two separate instructions,
2311 // where one stores the even registers and the other stores the odd registers.
2312
2313 // Form the QQQQ REG_SEQUENCE.
2314 SDValue V0 = N->getOperand(Vec0Idx + 0);
2315 SDValue V1 = N->getOperand(Vec0Idx + 1);
2316 SDValue V2 = N->getOperand(Vec0Idx + 2);
2317 SDValue V3 = (NumVecs == 3)
2318 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2319 : N->getOperand(Vec0Idx + 3);
2320 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2321
2322 // Store the even D registers. This is always an updating store, so that it
2323 // provides the address to the second store for the odd subregs.
2324 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2325 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2326 MemAddr.getValueType(),
2327 MVT::Other, OpsA);
2328 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
2329 Chain = SDValue(VStA, 1);
2330
2331 // Store the odd D registers.
2332 Ops.push_back(SDValue(VStA, 0));
2333 Ops.push_back(Align);
2334 if (isUpdating) {
2335 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2336 assert(isa<ConstantSDNode>(Inc.getNode()) &&
2337 "only constant post-increment update allowed for VST3/4");
2338 (void)Inc;
2339 Ops.push_back(Reg0);
2340 }
2341 Ops.push_back(RegSeq);
2342 Ops.push_back(Pred);
2343 Ops.push_back(Reg0);
2344 Ops.push_back(Chain);
2345 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2346 Ops);
2347 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
2348 ReplaceNode(N, VStB);
2349 }
2350
SelectVLDSTLane(SDNode * N,bool IsLoad,bool isUpdating,unsigned NumVecs,const uint16_t * DOpcodes,const uint16_t * QOpcodes)2351 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2352 unsigned NumVecs,
2353 const uint16_t *DOpcodes,
2354 const uint16_t *QOpcodes) {
2355 assert(Subtarget->hasNEON());
2356 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2357 SDLoc dl(N);
2358
2359 SDValue MemAddr, Align;
2360 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2361 // nodes are not intrinsics.
2362 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2363 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2364 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2365 return;
2366
2367 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2368
2369 SDValue Chain = N->getOperand(0);
2370 unsigned Lane =
2371 cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2372 EVT VT = N->getOperand(Vec0Idx).getValueType();
2373 bool is64BitVector = VT.is64BitVector();
2374
2375 unsigned Alignment = 0;
2376 if (NumVecs != 3) {
2377 Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2378 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2379 if (Alignment > NumBytes)
2380 Alignment = NumBytes;
2381 if (Alignment < 8 && Alignment < NumBytes)
2382 Alignment = 0;
2383 // Alignment must be a power of two; make sure of that.
2384 Alignment = (Alignment & -Alignment);
2385 if (Alignment == 1)
2386 Alignment = 0;
2387 }
2388 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2389
2390 unsigned OpcodeIndex;
2391 switch (VT.getSimpleVT().SimpleTy) {
2392 default: llvm_unreachable("unhandled vld/vst lane type");
2393 // Double-register operations:
2394 case MVT::v8i8: OpcodeIndex = 0; break;
2395 case MVT::v4f16:
2396 case MVT::v4bf16:
2397 case MVT::v4i16: OpcodeIndex = 1; break;
2398 case MVT::v2f32:
2399 case MVT::v2i32: OpcodeIndex = 2; break;
2400 // Quad-register operations:
2401 case MVT::v8f16:
2402 case MVT::v8bf16:
2403 case MVT::v8i16: OpcodeIndex = 0; break;
2404 case MVT::v4f32:
2405 case MVT::v4i32: OpcodeIndex = 1; break;
2406 }
2407
2408 std::vector<EVT> ResTys;
2409 if (IsLoad) {
2410 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2411 if (!is64BitVector)
2412 ResTyElts *= 2;
2413 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2414 MVT::i64, ResTyElts));
2415 }
2416 if (isUpdating)
2417 ResTys.push_back(MVT::i32);
2418 ResTys.push_back(MVT::Other);
2419
2420 SDValue Pred = getAL(CurDAG, dl);
2421 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2422
2423 SmallVector<SDValue, 8> Ops;
2424 Ops.push_back(MemAddr);
2425 Ops.push_back(Align);
2426 if (isUpdating) {
2427 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2428 bool IsImmUpdate =
2429 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2430 Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2431 }
2432
2433 SDValue SuperReg;
2434 SDValue V0 = N->getOperand(Vec0Idx + 0);
2435 SDValue V1 = N->getOperand(Vec0Idx + 1);
2436 if (NumVecs == 2) {
2437 if (is64BitVector)
2438 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2439 else
2440 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2441 } else {
2442 SDValue V2 = N->getOperand(Vec0Idx + 2);
2443 SDValue V3 = (NumVecs == 3)
2444 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2445 : N->getOperand(Vec0Idx + 3);
2446 if (is64BitVector)
2447 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2448 else
2449 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2450 }
2451 Ops.push_back(SuperReg);
2452 Ops.push_back(getI32Imm(Lane, dl));
2453 Ops.push_back(Pred);
2454 Ops.push_back(Reg0);
2455 Ops.push_back(Chain);
2456
2457 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2458 QOpcodes[OpcodeIndex]);
2459 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2460 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
2461 if (!IsLoad) {
2462 ReplaceNode(N, VLdLn);
2463 return;
2464 }
2465
2466 // Extract the subregisters.
2467 SuperReg = SDValue(VLdLn, 0);
2468 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2469 ARM::qsub_3 == ARM::qsub_0 + 3,
2470 "Unexpected subreg numbering");
2471 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2472 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2473 ReplaceUses(SDValue(N, Vec),
2474 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2475 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2476 if (isUpdating)
2477 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2478 CurDAG->RemoveDeadNode(N);
2479 }
2480
2481 template <typename SDValueVector>
AddMVEPredicateToOps(SDValueVector & Ops,SDLoc Loc,SDValue PredicateMask)2482 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2483 SDValue PredicateMask) {
2484 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2485 Ops.push_back(PredicateMask);
2486 }
2487
2488 template <typename SDValueVector>
AddMVEPredicateToOps(SDValueVector & Ops,SDLoc Loc,SDValue PredicateMask,SDValue Inactive)2489 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2490 SDValue PredicateMask,
2491 SDValue Inactive) {
2492 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2493 Ops.push_back(PredicateMask);
2494 Ops.push_back(Inactive);
2495 }
2496
2497 template <typename SDValueVector>
AddEmptyMVEPredicateToOps(SDValueVector & Ops,SDLoc Loc)2498 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) {
2499 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2500 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2501 }
2502
2503 template <typename SDValueVector>
AddEmptyMVEPredicateToOps(SDValueVector & Ops,SDLoc Loc,EVT InactiveTy)2504 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2505 EVT InactiveTy) {
2506 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2507 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2508 Ops.push_back(SDValue(
2509 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0));
2510 }
2511
SelectMVE_WB(SDNode * N,const uint16_t * Opcodes,bool Predicated)2512 void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes,
2513 bool Predicated) {
2514 SDLoc Loc(N);
2515 SmallVector<SDValue, 8> Ops;
2516
2517 uint16_t Opcode;
2518 switch (N->getValueType(1).getVectorElementType().getSizeInBits()) {
2519 case 32:
2520 Opcode = Opcodes[0];
2521 break;
2522 case 64:
2523 Opcode = Opcodes[1];
2524 break;
2525 default:
2526 llvm_unreachable("bad vector element size in SelectMVE_WB");
2527 }
2528
2529 Ops.push_back(N->getOperand(2)); // vector of base addresses
2530
2531 int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
2532 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset
2533
2534 if (Predicated)
2535 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
2536 else
2537 AddEmptyMVEPredicateToOps(Ops, Loc);
2538
2539 Ops.push_back(N->getOperand(0)); // chain
2540
2541 SmallVector<EVT, 8> VTs;
2542 VTs.push_back(N->getValueType(1));
2543 VTs.push_back(N->getValueType(0));
2544 VTs.push_back(N->getValueType(2));
2545
2546 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), VTs, Ops);
2547 ReplaceUses(SDValue(N, 0), SDValue(New, 1));
2548 ReplaceUses(SDValue(N, 1), SDValue(New, 0));
2549 ReplaceUses(SDValue(N, 2), SDValue(New, 2));
2550 CurDAG->RemoveDeadNode(N);
2551 }
2552
SelectMVE_LongShift(SDNode * N,uint16_t Opcode,bool Immediate,bool HasSaturationOperand)2553 void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode,
2554 bool Immediate,
2555 bool HasSaturationOperand) {
2556 SDLoc Loc(N);
2557 SmallVector<SDValue, 8> Ops;
2558
2559 // Two 32-bit halves of the value to be shifted
2560 Ops.push_back(N->getOperand(1));
2561 Ops.push_back(N->getOperand(2));
2562
2563 // The shift count
2564 if (Immediate) {
2565 int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
2566 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
2567 } else {
2568 Ops.push_back(N->getOperand(3));
2569 }
2570
2571 // The immediate saturation operand, if any
2572 if (HasSaturationOperand) {
2573 int32_t SatOp = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue();
2574 int SatBit = (SatOp == 64 ? 0 : 1);
2575 Ops.push_back(getI32Imm(SatBit, Loc));
2576 }
2577
2578 // MVE scalar shifts are IT-predicable, so include the standard
2579 // predicate arguments.
2580 Ops.push_back(getAL(CurDAG, Loc));
2581 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2582
2583 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
2584 }
2585
SelectMVE_VADCSBC(SDNode * N,uint16_t OpcodeWithCarry,uint16_t OpcodeWithNoCarry,bool Add,bool Predicated)2586 void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
2587 uint16_t OpcodeWithNoCarry,
2588 bool Add, bool Predicated) {
2589 SDLoc Loc(N);
2590 SmallVector<SDValue, 8> Ops;
2591 uint16_t Opcode;
2592
2593 unsigned FirstInputOp = Predicated ? 2 : 1;
2594
2595 // Two input vectors and the input carry flag
2596 Ops.push_back(N->getOperand(FirstInputOp));
2597 Ops.push_back(N->getOperand(FirstInputOp + 1));
2598 SDValue CarryIn = N->getOperand(FirstInputOp + 2);
2599 ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn);
2600 uint32_t CarryMask = 1 << 29;
2601 uint32_t CarryExpected = Add ? 0 : CarryMask;
2602 if (CarryInConstant &&
2603 (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) {
2604 Opcode = OpcodeWithNoCarry;
2605 } else {
2606 Ops.push_back(CarryIn);
2607 Opcode = OpcodeWithCarry;
2608 }
2609
2610 if (Predicated)
2611 AddMVEPredicateToOps(Ops, Loc,
2612 N->getOperand(FirstInputOp + 3), // predicate
2613 N->getOperand(FirstInputOp - 1)); // inactive
2614 else
2615 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
2616
2617 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
2618 }
2619
SelectMVE_VSHLC(SDNode * N,bool Predicated)2620 void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) {
2621 SDLoc Loc(N);
2622 SmallVector<SDValue, 8> Ops;
2623
2624 // One vector input, followed by a 32-bit word of bits to shift in
2625 // and then an immediate shift count
2626 Ops.push_back(N->getOperand(1));
2627 Ops.push_back(N->getOperand(2));
2628 int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
2629 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
2630
2631 if (Predicated)
2632 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
2633 else
2634 AddEmptyMVEPredicateToOps(Ops, Loc);
2635
2636 CurDAG->SelectNodeTo(N, ARM::MVE_VSHLC, N->getVTList(), makeArrayRef(Ops));
2637 }
2638
SDValueToConstBool(SDValue SDVal)2639 static bool SDValueToConstBool(SDValue SDVal) {
2640 assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant");
2641 ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal);
2642 uint64_t Value = SDValConstant->getZExtValue();
2643 assert((Value == 0 || Value == 1) && "expected value 0 or 1");
2644 return Value;
2645 }
2646
SelectBaseMVE_VMLLDAV(SDNode * N,bool Predicated,const uint16_t * OpcodesS,const uint16_t * OpcodesU,size_t Stride,size_t TySize)2647 void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
2648 const uint16_t *OpcodesS,
2649 const uint16_t *OpcodesU,
2650 size_t Stride, size_t TySize) {
2651 assert(TySize < Stride && "Invalid TySize");
2652 bool IsUnsigned = SDValueToConstBool(N->getOperand(1));
2653 bool IsSub = SDValueToConstBool(N->getOperand(2));
2654 bool IsExchange = SDValueToConstBool(N->getOperand(3));
2655 if (IsUnsigned) {
2656 assert(!IsSub &&
2657 "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist");
2658 assert(!IsExchange &&
2659 "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist");
2660 }
2661
2662 auto OpIsZero = [N](size_t OpNo) {
2663 if (ConstantSDNode *OpConst = dyn_cast<ConstantSDNode>(N->getOperand(OpNo)))
2664 if (OpConst->getZExtValue() == 0)
2665 return true;
2666 return false;
2667 };
2668
2669 // If the input accumulator value is not zero, select an instruction with
2670 // accumulator, otherwise select an instruction without accumulator
2671 bool IsAccum = !(OpIsZero(4) && OpIsZero(5));
2672
2673 const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS;
2674 if (IsSub)
2675 Opcodes += 4 * Stride;
2676 if (IsExchange)
2677 Opcodes += 2 * Stride;
2678 if (IsAccum)
2679 Opcodes += Stride;
2680 uint16_t Opcode = Opcodes[TySize];
2681
2682 SDLoc Loc(N);
2683 SmallVector<SDValue, 8> Ops;
2684 // Push the accumulator operands, if they are used
2685 if (IsAccum) {
2686 Ops.push_back(N->getOperand(4));
2687 Ops.push_back(N->getOperand(5));
2688 }
2689 // Push the two vector operands
2690 Ops.push_back(N->getOperand(6));
2691 Ops.push_back(N->getOperand(7));
2692
2693 if (Predicated)
2694 AddMVEPredicateToOps(Ops, Loc, N->getOperand(8));
2695 else
2696 AddEmptyMVEPredicateToOps(Ops, Loc);
2697
2698 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
2699 }
2700
SelectMVE_VMLLDAV(SDNode * N,bool Predicated,const uint16_t * OpcodesS,const uint16_t * OpcodesU)2701 void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated,
2702 const uint16_t *OpcodesS,
2703 const uint16_t *OpcodesU) {
2704 EVT VecTy = N->getOperand(6).getValueType();
2705 size_t SizeIndex;
2706 switch (VecTy.getVectorElementType().getSizeInBits()) {
2707 case 16:
2708 SizeIndex = 0;
2709 break;
2710 case 32:
2711 SizeIndex = 1;
2712 break;
2713 default:
2714 llvm_unreachable("bad vector element size");
2715 }
2716
2717 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex);
2718 }
2719
SelectMVE_VRMLLDAVH(SDNode * N,bool Predicated,const uint16_t * OpcodesS,const uint16_t * OpcodesU)2720 void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated,
2721 const uint16_t *OpcodesS,
2722 const uint16_t *OpcodesU) {
2723 assert(
2724 N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() ==
2725 32 &&
2726 "bad vector element size");
2727 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0);
2728 }
2729
SelectMVE_VLD(SDNode * N,unsigned NumVecs,const uint16_t * const * Opcodes,bool HasWriteback)2730 void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs,
2731 const uint16_t *const *Opcodes,
2732 bool HasWriteback) {
2733 EVT VT = N->getValueType(0);
2734 SDLoc Loc(N);
2735
2736 const uint16_t *OurOpcodes;
2737 switch (VT.getVectorElementType().getSizeInBits()) {
2738 case 8:
2739 OurOpcodes = Opcodes[0];
2740 break;
2741 case 16:
2742 OurOpcodes = Opcodes[1];
2743 break;
2744 case 32:
2745 OurOpcodes = Opcodes[2];
2746 break;
2747 default:
2748 llvm_unreachable("bad vector element size in SelectMVE_VLD");
2749 }
2750
2751 EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2);
2752 SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other};
2753 unsigned PtrOperand = HasWriteback ? 1 : 2;
2754
2755 auto Data = SDValue(
2756 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0);
2757 SDValue Chain = N->getOperand(0);
2758 // Add a MVE_VLDn instruction for each Vec, except the last
2759 for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) {
2760 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
2761 auto LoadInst =
2762 CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops);
2763 Data = SDValue(LoadInst, 0);
2764 Chain = SDValue(LoadInst, 1);
2765 }
2766 // The last may need a writeback on it
2767 if (HasWriteback)
2768 ResultTys = {DataTy, MVT::i32, MVT::Other};
2769 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
2770 auto LoadInst =
2771 CurDAG->getMachineNode(OurOpcodes[NumVecs - 1], Loc, ResultTys, Ops);
2772
2773 unsigned i;
2774 for (i = 0; i < NumVecs; i++)
2775 ReplaceUses(SDValue(N, i),
2776 CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT,
2777 SDValue(LoadInst, 0)));
2778 if (HasWriteback)
2779 ReplaceUses(SDValue(N, i++), SDValue(LoadInst, 1));
2780 ReplaceUses(SDValue(N, i), SDValue(LoadInst, HasWriteback ? 2 : 1));
2781 CurDAG->RemoveDeadNode(N);
2782 }
2783
SelectMVE_VxDUP(SDNode * N,const uint16_t * Opcodes,bool Wrapping,bool Predicated)2784 void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
2785 bool Wrapping, bool Predicated) {
2786 EVT VT = N->getValueType(0);
2787 SDLoc Loc(N);
2788
2789 uint16_t Opcode;
2790 switch (VT.getScalarSizeInBits()) {
2791 case 8:
2792 Opcode = Opcodes[0];
2793 break;
2794 case 16:
2795 Opcode = Opcodes[1];
2796 break;
2797 case 32:
2798 Opcode = Opcodes[2];
2799 break;
2800 default:
2801 llvm_unreachable("bad vector element size in SelectMVE_VxDUP");
2802 }
2803
2804 SmallVector<SDValue, 8> Ops;
2805 unsigned OpIdx = 1;
2806
2807 SDValue Inactive;
2808 if (Predicated)
2809 Inactive = N->getOperand(OpIdx++);
2810
2811 Ops.push_back(N->getOperand(OpIdx++)); // base
2812 if (Wrapping)
2813 Ops.push_back(N->getOperand(OpIdx++)); // limit
2814
2815 SDValue ImmOp = N->getOperand(OpIdx++); // step
2816 int ImmValue = cast<ConstantSDNode>(ImmOp)->getZExtValue();
2817 Ops.push_back(getI32Imm(ImmValue, Loc));
2818
2819 if (Predicated)
2820 AddMVEPredicateToOps(Ops, Loc, N->getOperand(OpIdx), Inactive);
2821 else
2822 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
2823
2824 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
2825 }
2826
SelectCDE_CXxD(SDNode * N,uint16_t Opcode,size_t NumExtraOps,bool HasAccum)2827 void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode,
2828 size_t NumExtraOps, bool HasAccum) {
2829 bool IsBigEndian = CurDAG->getDataLayout().isBigEndian();
2830 SDLoc Loc(N);
2831 SmallVector<SDValue, 8> Ops;
2832
2833 unsigned OpIdx = 1;
2834
2835 // Convert and append the immediate operand designating the coprocessor.
2836 SDValue ImmCorpoc = N->getOperand(OpIdx++);
2837 uint32_t ImmCoprocVal = cast<ConstantSDNode>(ImmCorpoc)->getZExtValue();
2838 Ops.push_back(getI32Imm(ImmCoprocVal, Loc));
2839
2840 // For accumulating variants copy the low and high order parts of the
2841 // accumulator into a register pair and add it to the operand vector.
2842 if (HasAccum) {
2843 SDValue AccLo = N->getOperand(OpIdx++);
2844 SDValue AccHi = N->getOperand(OpIdx++);
2845 if (IsBigEndian)
2846 std::swap(AccLo, AccHi);
2847 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, AccLo, AccHi), 0));
2848 }
2849
2850 // Copy extra operands as-is.
2851 for (size_t I = 0; I < NumExtraOps; I++)
2852 Ops.push_back(N->getOperand(OpIdx++));
2853
2854 // Convert and append the immediate operand
2855 SDValue Imm = N->getOperand(OpIdx);
2856 uint32_t ImmVal = cast<ConstantSDNode>(Imm)->getZExtValue();
2857 Ops.push_back(getI32Imm(ImmVal, Loc));
2858
2859 // Accumulating variants are IT-predicable, add predicate operands.
2860 if (HasAccum) {
2861 SDValue Pred = getAL(CurDAG, Loc);
2862 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2863 Ops.push_back(Pred);
2864 Ops.push_back(PredReg);
2865 }
2866
2867 // Create the CDE intruction
2868 SDNode *InstrNode = CurDAG->getMachineNode(Opcode, Loc, MVT::Untyped, Ops);
2869 SDValue ResultPair = SDValue(InstrNode, 0);
2870
2871 // The original intrinsic had two outputs, and the output of the dual-register
2872 // CDE instruction is a register pair. We need to extract the two subregisters
2873 // and replace all uses of the original outputs with the extracted
2874 // subregisters.
2875 uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1};
2876 if (IsBigEndian)
2877 std::swap(SubRegs[0], SubRegs[1]);
2878
2879 for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) {
2880 if (SDValue(N, ResIdx).use_empty())
2881 continue;
2882 SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegs[ResIdx], Loc,
2883 MVT::i32, ResultPair);
2884 ReplaceUses(SDValue(N, ResIdx), SubReg);
2885 }
2886
2887 CurDAG->RemoveDeadNode(N);
2888 }
2889
SelectVLDDup(SDNode * N,bool IsIntrinsic,bool isUpdating,unsigned NumVecs,const uint16_t * DOpcodes,const uint16_t * QOpcodes0,const uint16_t * QOpcodes1)2890 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2891 bool isUpdating, unsigned NumVecs,
2892 const uint16_t *DOpcodes,
2893 const uint16_t *QOpcodes0,
2894 const uint16_t *QOpcodes1) {
2895 assert(Subtarget->hasNEON());
2896 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2897 SDLoc dl(N);
2898
2899 SDValue MemAddr, Align;
2900 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2901 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2902 return;
2903
2904 SDValue Chain = N->getOperand(0);
2905 EVT VT = N->getValueType(0);
2906 bool is64BitVector = VT.is64BitVector();
2907
2908 unsigned Alignment = 0;
2909 if (NumVecs != 3) {
2910 Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2911 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2912 if (Alignment > NumBytes)
2913 Alignment = NumBytes;
2914 if (Alignment < 8 && Alignment < NumBytes)
2915 Alignment = 0;
2916 // Alignment must be a power of two; make sure of that.
2917 Alignment = (Alignment & -Alignment);
2918 if (Alignment == 1)
2919 Alignment = 0;
2920 }
2921 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2922
2923 unsigned OpcodeIndex;
2924 switch (VT.getSimpleVT().SimpleTy) {
2925 default: llvm_unreachable("unhandled vld-dup type");
2926 case MVT::v8i8:
2927 case MVT::v16i8: OpcodeIndex = 0; break;
2928 case MVT::v4i16:
2929 case MVT::v8i16:
2930 case MVT::v4f16:
2931 case MVT::v8f16:
2932 case MVT::v4bf16:
2933 case MVT::v8bf16:
2934 OpcodeIndex = 1; break;
2935 case MVT::v2f32:
2936 case MVT::v2i32:
2937 case MVT::v4f32:
2938 case MVT::v4i32: OpcodeIndex = 2; break;
2939 case MVT::v1f64:
2940 case MVT::v1i64: OpcodeIndex = 3; break;
2941 }
2942
2943 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2944 if (!is64BitVector)
2945 ResTyElts *= 2;
2946 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2947
2948 std::vector<EVT> ResTys;
2949 ResTys.push_back(ResTy);
2950 if (isUpdating)
2951 ResTys.push_back(MVT::i32);
2952 ResTys.push_back(MVT::Other);
2953
2954 SDValue Pred = getAL(CurDAG, dl);
2955 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2956
2957 SDNode *VLdDup;
2958 if (is64BitVector || NumVecs == 1) {
2959 SmallVector<SDValue, 6> Ops;
2960 Ops.push_back(MemAddr);
2961 Ops.push_back(Align);
2962 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] :
2963 QOpcodes0[OpcodeIndex];
2964 if (isUpdating) {
2965 // fixed-stride update instructions don't have an explicit writeback
2966 // operand. It's implicit in the opcode itself.
2967 SDValue Inc = N->getOperand(2);
2968 bool IsImmUpdate =
2969 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2970 if (NumVecs <= 2 && !IsImmUpdate)
2971 Opc = getVLDSTRegisterUpdateOpcode(Opc);
2972 if (!IsImmUpdate)
2973 Ops.push_back(Inc);
2974 // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2975 else if (NumVecs > 2)
2976 Ops.push_back(Reg0);
2977 }
2978 Ops.push_back(Pred);
2979 Ops.push_back(Reg0);
2980 Ops.push_back(Chain);
2981 VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2982 } else if (NumVecs == 2) {
2983 const SDValue OpsA[] = { MemAddr, Align, Pred, Reg0, Chain };
2984 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2985 dl, ResTys, OpsA);
2986
2987 Chain = SDValue(VLdA, 1);
2988 const SDValue OpsB[] = { MemAddr, Align, Pred, Reg0, Chain };
2989 VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2990 } else {
2991 SDValue ImplDef =
2992 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2993 const SDValue OpsA[] = { MemAddr, Align, ImplDef, Pred, Reg0, Chain };
2994 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2995 dl, ResTys, OpsA);
2996
2997 SDValue SuperReg = SDValue(VLdA, 0);
2998 Chain = SDValue(VLdA, 1);
2999 const SDValue OpsB[] = { MemAddr, Align, SuperReg, Pred, Reg0, Chain };
3000 VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
3001 }
3002
3003 // Transfer memoperands.
3004 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3005 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
3006
3007 // Extract the subregisters.
3008 if (NumVecs == 1) {
3009 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
3010 } else {
3011 SDValue SuperReg = SDValue(VLdDup, 0);
3012 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
3013 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
3014 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
3015 ReplaceUses(SDValue(N, Vec),
3016 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
3017 }
3018 }
3019 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
3020 if (isUpdating)
3021 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
3022 CurDAG->RemoveDeadNode(N);
3023 }
3024
tryV6T2BitfieldExtractOp(SDNode * N,bool isSigned)3025 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
3026 if (!Subtarget->hasV6T2Ops())
3027 return false;
3028
3029 unsigned Opc = isSigned
3030 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
3031 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
3032 SDLoc dl(N);
3033
3034 // For unsigned extracts, check for a shift right and mask
3035 unsigned And_imm = 0;
3036 if (N->getOpcode() == ISD::AND) {
3037 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
3038
3039 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
3040 if (And_imm & (And_imm + 1))
3041 return false;
3042
3043 unsigned Srl_imm = 0;
3044 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
3045 Srl_imm)) {
3046 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3047
3048 // Mask off the unnecessary bits of the AND immediate; normally
3049 // DAGCombine will do this, but that might not happen if
3050 // targetShrinkDemandedConstant chooses a different immediate.
3051 And_imm &= -1U >> Srl_imm;
3052
3053 // Note: The width operand is encoded as width-1.
3054 unsigned Width = countTrailingOnes(And_imm) - 1;
3055 unsigned LSB = Srl_imm;
3056
3057 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3058
3059 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
3060 // It's cheaper to use a right shift to extract the top bits.
3061 if (Subtarget->isThumb()) {
3062 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
3063 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3064 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3065 getAL(CurDAG, dl), Reg0, Reg0 };
3066 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3067 return true;
3068 }
3069
3070 // ARM models shift instructions as MOVsi with shifter operand.
3071 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
3072 SDValue ShOpc =
3073 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
3074 MVT::i32);
3075 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
3076 getAL(CurDAG, dl), Reg0, Reg0 };
3077 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
3078 return true;
3079 }
3080
3081 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3082 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3083 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3084 CurDAG->getTargetConstant(Width, dl, MVT::i32),
3085 getAL(CurDAG, dl), Reg0 };
3086 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3087 return true;
3088 }
3089 }
3090 return false;
3091 }
3092
3093 // Otherwise, we're looking for a shift of a shift
3094 unsigned Shl_imm = 0;
3095 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
3096 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
3097 unsigned Srl_imm = 0;
3098 if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
3099 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3100 // Note: The width operand is encoded as width-1.
3101 unsigned Width = 32 - Srl_imm - 1;
3102 int LSB = Srl_imm - Shl_imm;
3103 if (LSB < 0)
3104 return false;
3105 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3106 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3107 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3108 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3109 CurDAG->getTargetConstant(Width, dl, MVT::i32),
3110 getAL(CurDAG, dl), Reg0 };
3111 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3112 return true;
3113 }
3114 }
3115
3116 // Or we are looking for a shift of an and, with a mask operand
3117 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
3118 isShiftedMask_32(And_imm)) {
3119 unsigned Srl_imm = 0;
3120 unsigned LSB = countTrailingZeros(And_imm);
3121 // Shift must be the same as the ands lsb
3122 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
3123 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3124 unsigned MSB = 31 - countLeadingZeros(And_imm);
3125 // Note: The width operand is encoded as width-1.
3126 unsigned Width = MSB - LSB;
3127 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3128 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3129 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3130 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
3131 CurDAG->getTargetConstant(Width, dl, MVT::i32),
3132 getAL(CurDAG, dl), Reg0 };
3133 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3134 return true;
3135 }
3136 }
3137
3138 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
3139 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
3140 unsigned LSB = 0;
3141 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
3142 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
3143 return false;
3144
3145 if (LSB + Width > 32)
3146 return false;
3147
3148 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3149 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
3150 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3151 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3152 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
3153 getAL(CurDAG, dl), Reg0 };
3154 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3155 return true;
3156 }
3157
3158 return false;
3159 }
3160
3161 /// Target-specific DAG combining for ISD::XOR.
3162 /// Target-independent combining lowers SELECT_CC nodes of the form
3163 /// select_cc setg[ge] X, 0, X, -X
3164 /// select_cc setgt X, -1, X, -X
3165 /// select_cc setl[te] X, 0, -X, X
3166 /// select_cc setlt X, 1, -X, X
3167 /// which represent Integer ABS into:
3168 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
3169 /// ARM instruction selection detects the latter and matches it to
3170 /// ARM::ABS or ARM::t2ABS machine node.
tryABSOp(SDNode * N)3171 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
3172 SDValue XORSrc0 = N->getOperand(0);
3173 SDValue XORSrc1 = N->getOperand(1);
3174 EVT VT = N->getValueType(0);
3175
3176 if (Subtarget->isThumb1Only())
3177 return false;
3178
3179 if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
3180 return false;
3181
3182 SDValue ADDSrc0 = XORSrc0.getOperand(0);
3183 SDValue ADDSrc1 = XORSrc0.getOperand(1);
3184 SDValue SRASrc0 = XORSrc1.getOperand(0);
3185 SDValue SRASrc1 = XORSrc1.getOperand(1);
3186 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
3187 EVT XType = SRASrc0.getValueType();
3188 unsigned Size = XType.getSizeInBits() - 1;
3189
3190 if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
3191 XType.isInteger() && SRAConstant != nullptr &&
3192 Size == SRAConstant->getZExtValue()) {
3193 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
3194 CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
3195 return true;
3196 }
3197
3198 return false;
3199 }
3200
3201 /// We've got special pseudo-instructions for these
SelectCMP_SWAP(SDNode * N)3202 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
3203 unsigned Opcode;
3204 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
3205 if (MemTy == MVT::i8)
3206 Opcode = ARM::CMP_SWAP_8;
3207 else if (MemTy == MVT::i16)
3208 Opcode = ARM::CMP_SWAP_16;
3209 else if (MemTy == MVT::i32)
3210 Opcode = ARM::CMP_SWAP_32;
3211 else
3212 llvm_unreachable("Unknown AtomicCmpSwap type");
3213
3214 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
3215 N->getOperand(0)};
3216 SDNode *CmpSwap = CurDAG->getMachineNode(
3217 Opcode, SDLoc(N),
3218 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
3219
3220 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
3221 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
3222
3223 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
3224 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
3225 CurDAG->RemoveDeadNode(N);
3226 }
3227
3228 static Optional<std::pair<unsigned, unsigned>>
getContiguousRangeOfSetBits(const APInt & A)3229 getContiguousRangeOfSetBits(const APInt &A) {
3230 unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
3231 unsigned LastOne = A.countTrailingZeros();
3232 if (A.countPopulation() != (FirstOne - LastOne + 1))
3233 return Optional<std::pair<unsigned,unsigned>>();
3234 return std::make_pair(FirstOne, LastOne);
3235 }
3236
SelectCMPZ(SDNode * N,bool & SwitchEQNEToPLMI)3237 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
3238 assert(N->getOpcode() == ARMISD::CMPZ);
3239 SwitchEQNEToPLMI = false;
3240
3241 if (!Subtarget->isThumb())
3242 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
3243 // LSR don't exist as standalone instructions - they need the barrel shifter.
3244 return;
3245
3246 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
3247 SDValue And = N->getOperand(0);
3248 if (!And->hasOneUse())
3249 return;
3250
3251 SDValue Zero = N->getOperand(1);
3252 if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() ||
3253 And->getOpcode() != ISD::AND)
3254 return;
3255 SDValue X = And.getOperand(0);
3256 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
3257
3258 if (!C)
3259 return;
3260 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
3261 if (!Range)
3262 return;
3263
3264 // There are several ways to lower this:
3265 SDNode *NewN;
3266 SDLoc dl(N);
3267
3268 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
3269 if (Subtarget->isThumb2()) {
3270 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
3271 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3272 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3273 CurDAG->getRegister(0, MVT::i32) };
3274 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3275 } else {
3276 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
3277 CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3278 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3279 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3280 }
3281 };
3282
3283 if (Range->second == 0) {
3284 // 1. Mask includes the LSB -> Simply shift the top N bits off
3285 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3286 ReplaceNode(And.getNode(), NewN);
3287 } else if (Range->first == 31) {
3288 // 2. Mask includes the MSB -> Simply shift the bottom N bits off
3289 NewN = EmitShift(ARM::tLSRri, X, Range->second);
3290 ReplaceNode(And.getNode(), NewN);
3291 } else if (Range->first == Range->second) {
3292 // 3. Only one bit is set. We can shift this into the sign bit and use a
3293 // PL/MI comparison.
3294 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3295 ReplaceNode(And.getNode(), NewN);
3296
3297 SwitchEQNEToPLMI = true;
3298 } else if (!Subtarget->hasV6T2Ops()) {
3299 // 4. Do a double shift to clear bottom and top bits, but only in
3300 // thumb-1 mode as in thumb-2 we can use UBFX.
3301 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3302 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
3303 Range->second + (31 - Range->first));
3304 ReplaceNode(And.getNode(), NewN);
3305 }
3306
3307 }
3308
Select(SDNode * N)3309 void ARMDAGToDAGISel::Select(SDNode *N) {
3310 SDLoc dl(N);
3311
3312 if (N->isMachineOpcode()) {
3313 N->setNodeId(-1);
3314 return; // Already selected.
3315 }
3316
3317 switch (N->getOpcode()) {
3318 default: break;
3319 case ISD::STORE: {
3320 // For Thumb1, match an sp-relative store in C++. This is a little
3321 // unfortunate, but I don't think I can make the chain check work
3322 // otherwise. (The chain of the store has to be the same as the chain
3323 // of the CopyFromReg, or else we can't replace the CopyFromReg with
3324 // a direct reference to "SP".)
3325 //
3326 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
3327 // a different addressing mode from other four-byte stores.
3328 //
3329 // This pattern usually comes up with call arguments.
3330 StoreSDNode *ST = cast<StoreSDNode>(N);
3331 SDValue Ptr = ST->getBasePtr();
3332 if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
3333 int RHSC = 0;
3334 if (Ptr.getOpcode() == ISD::ADD &&
3335 isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
3336 Ptr = Ptr.getOperand(0);
3337
3338 if (Ptr.getOpcode() == ISD::CopyFromReg &&
3339 cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
3340 Ptr.getOperand(0) == ST->getChain()) {
3341 SDValue Ops[] = {ST->getValue(),
3342 CurDAG->getRegister(ARM::SP, MVT::i32),
3343 CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
3344 getAL(CurDAG, dl),
3345 CurDAG->getRegister(0, MVT::i32),
3346 ST->getChain()};
3347 MachineSDNode *ResNode =
3348 CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
3349 MachineMemOperand *MemOp = ST->getMemOperand();
3350 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
3351 ReplaceNode(N, ResNode);
3352 return;
3353 }
3354 }
3355 break;
3356 }
3357 case ISD::WRITE_REGISTER:
3358 if (tryWriteRegister(N))
3359 return;
3360 break;
3361 case ISD::READ_REGISTER:
3362 if (tryReadRegister(N))
3363 return;
3364 break;
3365 case ISD::INLINEASM:
3366 case ISD::INLINEASM_BR:
3367 if (tryInlineAsm(N))
3368 return;
3369 break;
3370 case ISD::XOR:
3371 // Select special operations if XOR node forms integer ABS pattern
3372 if (tryABSOp(N))
3373 return;
3374 // Other cases are autogenerated.
3375 break;
3376 case ISD::Constant: {
3377 unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
3378 // If we can't materialize the constant we need to use a literal pool
3379 if (ConstantMaterializationCost(Val, Subtarget) > 2) {
3380 SDValue CPIdx = CurDAG->getTargetConstantPool(
3381 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
3382 TLI->getPointerTy(CurDAG->getDataLayout()));
3383
3384 SDNode *ResNode;
3385 if (Subtarget->isThumb()) {
3386 SDValue Ops[] = {
3387 CPIdx,
3388 getAL(CurDAG, dl),
3389 CurDAG->getRegister(0, MVT::i32),
3390 CurDAG->getEntryNode()
3391 };
3392 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
3393 Ops);
3394 } else {
3395 SDValue Ops[] = {
3396 CPIdx,
3397 CurDAG->getTargetConstant(0, dl, MVT::i32),
3398 getAL(CurDAG, dl),
3399 CurDAG->getRegister(0, MVT::i32),
3400 CurDAG->getEntryNode()
3401 };
3402 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
3403 Ops);
3404 }
3405 // Annotate the Node with memory operand information so that MachineInstr
3406 // queries work properly. This e.g. gives the register allocation the
3407 // required information for rematerialization.
3408 MachineFunction& MF = CurDAG->getMachineFunction();
3409 MachineMemOperand *MemOp =
3410 MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
3411 MachineMemOperand::MOLoad, 4, Align(4));
3412
3413 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
3414
3415 ReplaceNode(N, ResNode);
3416 return;
3417 }
3418
3419 // Other cases are autogenerated.
3420 break;
3421 }
3422 case ISD::FrameIndex: {
3423 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
3424 int FI = cast<FrameIndexSDNode>(N)->getIndex();
3425 SDValue TFI = CurDAG->getTargetFrameIndex(
3426 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
3427 if (Subtarget->isThumb1Only()) {
3428 // Set the alignment of the frame object to 4, to avoid having to generate
3429 // more than one ADD
3430 MachineFrameInfo &MFI = MF->getFrameInfo();
3431 if (MFI.getObjectAlign(FI) < Align(4))
3432 MFI.setObjectAlignment(FI, Align(4));
3433 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
3434 CurDAG->getTargetConstant(0, dl, MVT::i32));
3435 return;
3436 } else {
3437 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
3438 ARM::t2ADDri : ARM::ADDri);
3439 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
3440 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3441 CurDAG->getRegister(0, MVT::i32) };
3442 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3443 return;
3444 }
3445 }
3446 case ISD::SRL:
3447 if (tryV6T2BitfieldExtractOp(N, false))
3448 return;
3449 break;
3450 case ISD::SIGN_EXTEND_INREG:
3451 case ISD::SRA:
3452 if (tryV6T2BitfieldExtractOp(N, true))
3453 return;
3454 break;
3455 case ISD::MUL:
3456 if (Subtarget->isThumb1Only())
3457 break;
3458 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
3459 unsigned RHSV = C->getZExtValue();
3460 if (!RHSV) break;
3461 if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
3462 unsigned ShImm = Log2_32(RHSV-1);
3463 if (ShImm >= 32)
3464 break;
3465 SDValue V = N->getOperand(0);
3466 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
3467 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3468 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3469 if (Subtarget->isThumb()) {
3470 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3471 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
3472 return;
3473 } else {
3474 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3475 Reg0 };
3476 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
3477 return;
3478 }
3479 }
3480 if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
3481 unsigned ShImm = Log2_32(RHSV+1);
3482 if (ShImm >= 32)
3483 break;
3484 SDValue V = N->getOperand(0);
3485 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
3486 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3487 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3488 if (Subtarget->isThumb()) {
3489 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3490 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
3491 return;
3492 } else {
3493 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3494 Reg0 };
3495 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
3496 return;
3497 }
3498 }
3499 }
3500 break;
3501 case ISD::AND: {
3502 // Check for unsigned bitfield extract
3503 if (tryV6T2BitfieldExtractOp(N, false))
3504 return;
3505
3506 // If an immediate is used in an AND node, it is possible that the immediate
3507 // can be more optimally materialized when negated. If this is the case we
3508 // can negate the immediate and use a BIC instead.
3509 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
3510 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
3511 uint32_t Imm = (uint32_t) N1C->getZExtValue();
3512
3513 // In Thumb2 mode, an AND can take a 12-bit immediate. If this
3514 // immediate can be negated and fit in the immediate operand of
3515 // a t2BIC, don't do any manual transform here as this can be
3516 // handled by the generic ISel machinery.
3517 bool PreferImmediateEncoding =
3518 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
3519 if (!PreferImmediateEncoding &&
3520 ConstantMaterializationCost(Imm, Subtarget) >
3521 ConstantMaterializationCost(~Imm, Subtarget)) {
3522 // The current immediate costs more to materialize than a negated
3523 // immediate, so negate the immediate and use a BIC.
3524 SDValue NewImm =
3525 CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
3526 // If the new constant didn't exist before, reposition it in the topological
3527 // ordering so it is just before N. Otherwise, don't touch its location.
3528 if (NewImm->getNodeId() == -1)
3529 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
3530
3531 if (!Subtarget->hasThumb2()) {
3532 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
3533 N->getOperand(0), NewImm, getAL(CurDAG, dl),
3534 CurDAG->getRegister(0, MVT::i32)};
3535 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
3536 return;
3537 } else {
3538 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
3539 CurDAG->getRegister(0, MVT::i32),
3540 CurDAG->getRegister(0, MVT::i32)};
3541 ReplaceNode(N,
3542 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
3543 return;
3544 }
3545 }
3546 }
3547
3548 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
3549 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
3550 // are entirely contributed by c2 and lower 16-bits are entirely contributed
3551 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
3552 // Select it to: "movt x, ((c1 & 0xffff) >> 16)
3553 EVT VT = N->getValueType(0);
3554 if (VT != MVT::i32)
3555 break;
3556 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
3557 ? ARM::t2MOVTi16
3558 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
3559 if (!Opc)
3560 break;
3561 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3562 N1C = dyn_cast<ConstantSDNode>(N1);
3563 if (!N1C)
3564 break;
3565 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
3566 SDValue N2 = N0.getOperand(1);
3567 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
3568 if (!N2C)
3569 break;
3570 unsigned N1CVal = N1C->getZExtValue();
3571 unsigned N2CVal = N2C->getZExtValue();
3572 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
3573 (N1CVal & 0xffffU) == 0xffffU &&
3574 (N2CVal & 0xffffU) == 0x0U) {
3575 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
3576 dl, MVT::i32);
3577 SDValue Ops[] = { N0.getOperand(0), Imm16,
3578 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
3579 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
3580 return;
3581 }
3582 }
3583
3584 break;
3585 }
3586 case ARMISD::UMAAL: {
3587 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
3588 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3589 N->getOperand(2), N->getOperand(3),
3590 getAL(CurDAG, dl),
3591 CurDAG->getRegister(0, MVT::i32) };
3592 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
3593 return;
3594 }
3595 case ARMISD::UMLAL:{
3596 if (Subtarget->isThumb()) {
3597 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3598 N->getOperand(3), getAL(CurDAG, dl),
3599 CurDAG->getRegister(0, MVT::i32)};
3600 ReplaceNode(
3601 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
3602 return;
3603 }else{
3604 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3605 N->getOperand(3), getAL(CurDAG, dl),
3606 CurDAG->getRegister(0, MVT::i32),
3607 CurDAG->getRegister(0, MVT::i32) };
3608 ReplaceNode(N, CurDAG->getMachineNode(
3609 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
3610 MVT::i32, MVT::i32, Ops));
3611 return;
3612 }
3613 }
3614 case ARMISD::SMLAL:{
3615 if (Subtarget->isThumb()) {
3616 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3617 N->getOperand(3), getAL(CurDAG, dl),
3618 CurDAG->getRegister(0, MVT::i32)};
3619 ReplaceNode(
3620 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
3621 return;
3622 }else{
3623 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3624 N->getOperand(3), getAL(CurDAG, dl),
3625 CurDAG->getRegister(0, MVT::i32),
3626 CurDAG->getRegister(0, MVT::i32) };
3627 ReplaceNode(N, CurDAG->getMachineNode(
3628 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3629 MVT::i32, MVT::i32, Ops));
3630 return;
3631 }
3632 }
3633 case ARMISD::SUBE: {
3634 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
3635 break;
3636 // Look for a pattern to match SMMLS
3637 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
3638 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
3639 N->getOperand(2).getOpcode() != ARMISD::SUBC ||
3640 !SDValue(N, 1).use_empty())
3641 break;
3642
3643 if (Subtarget->isThumb())
3644 assert(Subtarget->hasThumb2() &&
3645 "This pattern should not be generated for Thumb");
3646
3647 SDValue SmulLoHi = N->getOperand(1);
3648 SDValue Subc = N->getOperand(2);
3649 auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
3650
3651 if (!Zero || Zero->getZExtValue() != 0 ||
3652 Subc.getOperand(1) != SmulLoHi.getValue(0) ||
3653 N->getOperand(1) != SmulLoHi.getValue(1) ||
3654 N->getOperand(2) != Subc.getValue(1))
3655 break;
3656
3657 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
3658 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
3659 N->getOperand(0), getAL(CurDAG, dl),
3660 CurDAG->getRegister(0, MVT::i32) };
3661 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
3662 return;
3663 }
3664 case ISD::LOAD: {
3665 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
3666 return;
3667 if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
3668 if (tryT2IndexedLoad(N))
3669 return;
3670 } else if (Subtarget->isThumb()) {
3671 if (tryT1IndexedLoad(N))
3672 return;
3673 } else if (tryARMIndexedLoad(N))
3674 return;
3675 // Other cases are autogenerated.
3676 break;
3677 }
3678 case ISD::MLOAD:
3679 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
3680 return;
3681 // Other cases are autogenerated.
3682 break;
3683 case ARMISD::WLS:
3684 case ARMISD::LE: {
3685 SDValue Ops[] = { N->getOperand(1),
3686 N->getOperand(2),
3687 N->getOperand(0) };
3688 unsigned Opc = N->getOpcode() == ARMISD::WLS ?
3689 ARM::t2WhileLoopStart : ARM::t2LoopEnd;
3690 SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
3691 ReplaceUses(N, New);
3692 CurDAG->RemoveDeadNode(N);
3693 return;
3694 }
3695 case ARMISD::LDRD: {
3696 if (Subtarget->isThumb2())
3697 break; // TableGen handles isel in this case.
3698 SDValue Base, RegOffset, ImmOffset;
3699 const SDValue &Chain = N->getOperand(0);
3700 const SDValue &Addr = N->getOperand(1);
3701 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
3702 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
3703 // The register-offset variant of LDRD mandates that the register
3704 // allocated to RegOffset is not reused in any of the remaining operands.
3705 // This restriction is currently not enforced. Therefore emitting this
3706 // variant is explicitly avoided.
3707 Base = Addr;
3708 RegOffset = CurDAG->getRegister(0, MVT::i32);
3709 }
3710 SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain};
3711 SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl,
3712 {MVT::Untyped, MVT::Other}, Ops);
3713 SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
3714 SDValue(New, 0));
3715 SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
3716 SDValue(New, 0));
3717 transferMemOperands(N, New);
3718 ReplaceUses(SDValue(N, 0), Lo);
3719 ReplaceUses(SDValue(N, 1), Hi);
3720 ReplaceUses(SDValue(N, 2), SDValue(New, 1));
3721 CurDAG->RemoveDeadNode(N);
3722 return;
3723 }
3724 case ARMISD::STRD: {
3725 if (Subtarget->isThumb2())
3726 break; // TableGen handles isel in this case.
3727 SDValue Base, RegOffset, ImmOffset;
3728 const SDValue &Chain = N->getOperand(0);
3729 const SDValue &Addr = N->getOperand(3);
3730 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
3731 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
3732 // The register-offset variant of STRD mandates that the register
3733 // allocated to RegOffset is not reused in any of the remaining operands.
3734 // This restriction is currently not enforced. Therefore emitting this
3735 // variant is explicitly avoided.
3736 Base = Addr;
3737 RegOffset = CurDAG->getRegister(0, MVT::i32);
3738 }
3739 SDNode *RegPair =
3740 createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2));
3741 SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain};
3742 SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops);
3743 transferMemOperands(N, New);
3744 ReplaceUses(SDValue(N, 0), SDValue(New, 0));
3745 CurDAG->RemoveDeadNode(N);
3746 return;
3747 }
3748 case ARMISD::LOOP_DEC: {
3749 SDValue Ops[] = { N->getOperand(1),
3750 N->getOperand(2),
3751 N->getOperand(0) };
3752 SDNode *Dec =
3753 CurDAG->getMachineNode(ARM::t2LoopDec, dl,
3754 CurDAG->getVTList(MVT::i32, MVT::Other), Ops);
3755 ReplaceUses(N, Dec);
3756 CurDAG->RemoveDeadNode(N);
3757 return;
3758 }
3759 case ARMISD::BRCOND: {
3760 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3761 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3762 // Pattern complexity = 6 cost = 1 size = 0
3763
3764 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3765 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
3766 // Pattern complexity = 6 cost = 1 size = 0
3767
3768 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3769 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3770 // Pattern complexity = 6 cost = 1 size = 0
3771
3772 unsigned Opc = Subtarget->isThumb() ?
3773 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
3774 SDValue Chain = N->getOperand(0);
3775 SDValue N1 = N->getOperand(1);
3776 SDValue N2 = N->getOperand(2);
3777 SDValue N3 = N->getOperand(3);
3778 SDValue InFlag = N->getOperand(4);
3779 assert(N1.getOpcode() == ISD::BasicBlock);
3780 assert(N2.getOpcode() == ISD::Constant);
3781 assert(N3.getOpcode() == ISD::Register);
3782
3783 unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
3784
3785 if (InFlag.getOpcode() == ARMISD::CMPZ) {
3786 if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
3787 SDValue Int = InFlag.getOperand(0);
3788 uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
3789
3790 // Handle low-overhead loops.
3791 if (ID == Intrinsic::loop_decrement_reg) {
3792 SDValue Elements = Int.getOperand(2);
3793 SDValue Size = CurDAG->getTargetConstant(
3794 cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl,
3795 MVT::i32);
3796
3797 SDValue Args[] = { Elements, Size, Int.getOperand(0) };
3798 SDNode *LoopDec =
3799 CurDAG->getMachineNode(ARM::t2LoopDec, dl,
3800 CurDAG->getVTList(MVT::i32, MVT::Other),
3801 Args);
3802 ReplaceUses(Int.getNode(), LoopDec);
3803
3804 SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
3805 SDNode *LoopEnd =
3806 CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
3807
3808 ReplaceUses(N, LoopEnd);
3809 CurDAG->RemoveDeadNode(N);
3810 CurDAG->RemoveDeadNode(InFlag.getNode());
3811 CurDAG->RemoveDeadNode(Int.getNode());
3812 return;
3813 }
3814 }
3815
3816 bool SwitchEQNEToPLMI;
3817 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3818 InFlag = N->getOperand(4);
3819
3820 if (SwitchEQNEToPLMI) {
3821 switch ((ARMCC::CondCodes)CC) {
3822 default: llvm_unreachable("CMPZ must be either NE or EQ!");
3823 case ARMCC::NE:
3824 CC = (unsigned)ARMCC::MI;
3825 break;
3826 case ARMCC::EQ:
3827 CC = (unsigned)ARMCC::PL;
3828 break;
3829 }
3830 }
3831 }
3832
3833 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
3834 SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
3835 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
3836 MVT::Glue, Ops);
3837 Chain = SDValue(ResNode, 0);
3838 if (N->getNumValues() == 2) {
3839 InFlag = SDValue(ResNode, 1);
3840 ReplaceUses(SDValue(N, 1), InFlag);
3841 }
3842 ReplaceUses(SDValue(N, 0),
3843 SDValue(Chain.getNode(), Chain.getResNo()));
3844 CurDAG->RemoveDeadNode(N);
3845 return;
3846 }
3847
3848 case ARMISD::CMPZ: {
3849 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
3850 // This allows us to avoid materializing the expensive negative constant.
3851 // The CMPZ #0 is useless and will be peepholed away but we need to keep it
3852 // for its glue output.
3853 SDValue X = N->getOperand(0);
3854 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
3855 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
3856 int64_t Addend = -C->getSExtValue();
3857
3858 SDNode *Add = nullptr;
3859 // ADDS can be better than CMN if the immediate fits in a
3860 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
3861 // Outside that range we can just use a CMN which is 32-bit but has a
3862 // 12-bit immediate range.
3863 if (Addend < 1<<8) {
3864 if (Subtarget->isThumb2()) {
3865 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3866 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3867 CurDAG->getRegister(0, MVT::i32) };
3868 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
3869 } else {
3870 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
3871 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
3872 CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3873 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3874 Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3875 }
3876 }
3877 if (Add) {
3878 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
3879 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
3880 }
3881 }
3882 // Other cases are autogenerated.
3883 break;
3884 }
3885
3886 case ARMISD::CMOV: {
3887 SDValue InFlag = N->getOperand(4);
3888
3889 if (InFlag.getOpcode() == ARMISD::CMPZ) {
3890 bool SwitchEQNEToPLMI;
3891 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3892
3893 if (SwitchEQNEToPLMI) {
3894 SDValue ARMcc = N->getOperand(2);
3895 ARMCC::CondCodes CC =
3896 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
3897
3898 switch (CC) {
3899 default: llvm_unreachable("CMPZ must be either NE or EQ!");
3900 case ARMCC::NE:
3901 CC = ARMCC::MI;
3902 break;
3903 case ARMCC::EQ:
3904 CC = ARMCC::PL;
3905 break;
3906 }
3907 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
3908 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
3909 N->getOperand(3), N->getOperand(4)};
3910 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
3911 }
3912
3913 }
3914 // Other cases are autogenerated.
3915 break;
3916 }
3917
3918 case ARMISD::VZIP: {
3919 unsigned Opc = 0;
3920 EVT VT = N->getValueType(0);
3921 switch (VT.getSimpleVT().SimpleTy) {
3922 default: return;
3923 case MVT::v8i8: Opc = ARM::VZIPd8; break;
3924 case MVT::v4f16:
3925 case MVT::v4i16: Opc = ARM::VZIPd16; break;
3926 case MVT::v2f32:
3927 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3928 case MVT::v2i32: Opc = ARM::VTRNd32; break;
3929 case MVT::v16i8: Opc = ARM::VZIPq8; break;
3930 case MVT::v8f16:
3931 case MVT::v8i16: Opc = ARM::VZIPq16; break;
3932 case MVT::v4f32:
3933 case MVT::v4i32: Opc = ARM::VZIPq32; break;
3934 }
3935 SDValue Pred = getAL(CurDAG, dl);
3936 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3937 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3938 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3939 return;
3940 }
3941 case ARMISD::VUZP: {
3942 unsigned Opc = 0;
3943 EVT VT = N->getValueType(0);
3944 switch (VT.getSimpleVT().SimpleTy) {
3945 default: return;
3946 case MVT::v8i8: Opc = ARM::VUZPd8; break;
3947 case MVT::v4f16:
3948 case MVT::v4i16: Opc = ARM::VUZPd16; break;
3949 case MVT::v2f32:
3950 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3951 case MVT::v2i32: Opc = ARM::VTRNd32; break;
3952 case MVT::v16i8: Opc = ARM::VUZPq8; break;
3953 case MVT::v8f16:
3954 case MVT::v8i16: Opc = ARM::VUZPq16; break;
3955 case MVT::v4f32:
3956 case MVT::v4i32: Opc = ARM::VUZPq32; break;
3957 }
3958 SDValue Pred = getAL(CurDAG, dl);
3959 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3960 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3961 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3962 return;
3963 }
3964 case ARMISD::VTRN: {
3965 unsigned Opc = 0;
3966 EVT VT = N->getValueType(0);
3967 switch (VT.getSimpleVT().SimpleTy) {
3968 default: return;
3969 case MVT::v8i8: Opc = ARM::VTRNd8; break;
3970 case MVT::v4f16:
3971 case MVT::v4i16: Opc = ARM::VTRNd16; break;
3972 case MVT::v2f32:
3973 case MVT::v2i32: Opc = ARM::VTRNd32; break;
3974 case MVT::v16i8: Opc = ARM::VTRNq8; break;
3975 case MVT::v8f16:
3976 case MVT::v8i16: Opc = ARM::VTRNq16; break;
3977 case MVT::v4f32:
3978 case MVT::v4i32: Opc = ARM::VTRNq32; break;
3979 }
3980 SDValue Pred = getAL(CurDAG, dl);
3981 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3982 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3983 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3984 return;
3985 }
3986 case ARMISD::BUILD_VECTOR: {
3987 EVT VecVT = N->getValueType(0);
3988 EVT EltVT = VecVT.getVectorElementType();
3989 unsigned NumElts = VecVT.getVectorNumElements();
3990 if (EltVT == MVT::f64) {
3991 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
3992 ReplaceNode(
3993 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3994 return;
3995 }
3996 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
3997 if (NumElts == 2) {
3998 ReplaceNode(
3999 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
4000 return;
4001 }
4002 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
4003 ReplaceNode(N,
4004 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
4005 N->getOperand(2), N->getOperand(3)));
4006 return;
4007 }
4008
4009 case ARMISD::VLD1DUP: {
4010 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
4011 ARM::VLD1DUPd32 };
4012 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
4013 ARM::VLD1DUPq32 };
4014 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
4015 return;
4016 }
4017
4018 case ARMISD::VLD2DUP: {
4019 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4020 ARM::VLD2DUPd32 };
4021 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
4022 return;
4023 }
4024
4025 case ARMISD::VLD3DUP: {
4026 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
4027 ARM::VLD3DUPd16Pseudo,
4028 ARM::VLD3DUPd32Pseudo };
4029 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
4030 return;
4031 }
4032
4033 case ARMISD::VLD4DUP: {
4034 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
4035 ARM::VLD4DUPd16Pseudo,
4036 ARM::VLD4DUPd32Pseudo };
4037 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
4038 return;
4039 }
4040
4041 case ARMISD::VLD1DUP_UPD: {
4042 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
4043 ARM::VLD1DUPd16wb_fixed,
4044 ARM::VLD1DUPd32wb_fixed };
4045 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
4046 ARM::VLD1DUPq16wb_fixed,
4047 ARM::VLD1DUPq32wb_fixed };
4048 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
4049 return;
4050 }
4051
4052 case ARMISD::VLD2DUP_UPD: {
4053 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
4054 ARM::VLD2DUPd16wb_fixed,
4055 ARM::VLD2DUPd32wb_fixed };
4056 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, Opcodes);
4057 return;
4058 }
4059
4060 case ARMISD::VLD3DUP_UPD: {
4061 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
4062 ARM::VLD3DUPd16Pseudo_UPD,
4063 ARM::VLD3DUPd32Pseudo_UPD };
4064 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, Opcodes);
4065 return;
4066 }
4067
4068 case ARMISD::VLD4DUP_UPD: {
4069 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
4070 ARM::VLD4DUPd16Pseudo_UPD,
4071 ARM::VLD4DUPd32Pseudo_UPD };
4072 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, Opcodes);
4073 return;
4074 }
4075
4076 case ARMISD::VLD1_UPD: {
4077 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
4078 ARM::VLD1d16wb_fixed,
4079 ARM::VLD1d32wb_fixed,
4080 ARM::VLD1d64wb_fixed };
4081 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
4082 ARM::VLD1q16wb_fixed,
4083 ARM::VLD1q32wb_fixed,
4084 ARM::VLD1q64wb_fixed };
4085 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
4086 return;
4087 }
4088
4089 case ARMISD::VLD2_UPD: {
4090 if (Subtarget->hasNEON()) {
4091 static const uint16_t DOpcodes[] = {
4092 ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed,
4093 ARM::VLD1q64wb_fixed};
4094 static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed,
4095 ARM::VLD2q16PseudoWB_fixed,
4096 ARM::VLD2q32PseudoWB_fixed};
4097 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
4098 } else {
4099 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8,
4100 ARM::MVE_VLD21_8_wb};
4101 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
4102 ARM::MVE_VLD21_16_wb};
4103 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
4104 ARM::MVE_VLD21_32_wb};
4105 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4106 SelectMVE_VLD(N, 2, Opcodes, true);
4107 }
4108 return;
4109 }
4110
4111 case ARMISD::VLD3_UPD: {
4112 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
4113 ARM::VLD3d16Pseudo_UPD,
4114 ARM::VLD3d32Pseudo_UPD,
4115 ARM::VLD1d64TPseudoWB_fixed};
4116 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4117 ARM::VLD3q16Pseudo_UPD,
4118 ARM::VLD3q32Pseudo_UPD };
4119 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
4120 ARM::VLD3q16oddPseudo_UPD,
4121 ARM::VLD3q32oddPseudo_UPD };
4122 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4123 return;
4124 }
4125
4126 case ARMISD::VLD4_UPD: {
4127 if (Subtarget->hasNEON()) {
4128 static const uint16_t DOpcodes[] = {
4129 ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD,
4130 ARM::VLD1d64QPseudoWB_fixed};
4131 static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD,
4132 ARM::VLD4q16Pseudo_UPD,
4133 ARM::VLD4q32Pseudo_UPD};
4134 static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD,
4135 ARM::VLD4q16oddPseudo_UPD,
4136 ARM::VLD4q32oddPseudo_UPD};
4137 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4138 } else {
4139 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
4140 ARM::MVE_VLD42_8,
4141 ARM::MVE_VLD43_8_wb};
4142 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
4143 ARM::MVE_VLD42_16,
4144 ARM::MVE_VLD43_16_wb};
4145 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
4146 ARM::MVE_VLD42_32,
4147 ARM::MVE_VLD43_32_wb};
4148 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4149 SelectMVE_VLD(N, 4, Opcodes, true);
4150 }
4151 return;
4152 }
4153
4154 case ARMISD::VLD2LN_UPD: {
4155 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
4156 ARM::VLD2LNd16Pseudo_UPD,
4157 ARM::VLD2LNd32Pseudo_UPD };
4158 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
4159 ARM::VLD2LNq32Pseudo_UPD };
4160 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
4161 return;
4162 }
4163
4164 case ARMISD::VLD3LN_UPD: {
4165 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
4166 ARM::VLD3LNd16Pseudo_UPD,
4167 ARM::VLD3LNd32Pseudo_UPD };
4168 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
4169 ARM::VLD3LNq32Pseudo_UPD };
4170 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
4171 return;
4172 }
4173
4174 case ARMISD::VLD4LN_UPD: {
4175 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
4176 ARM::VLD4LNd16Pseudo_UPD,
4177 ARM::VLD4LNd32Pseudo_UPD };
4178 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
4179 ARM::VLD4LNq32Pseudo_UPD };
4180 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
4181 return;
4182 }
4183
4184 case ARMISD::VST1_UPD: {
4185 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
4186 ARM::VST1d16wb_fixed,
4187 ARM::VST1d32wb_fixed,
4188 ARM::VST1d64wb_fixed };
4189 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
4190 ARM::VST1q16wb_fixed,
4191 ARM::VST1q32wb_fixed,
4192 ARM::VST1q64wb_fixed };
4193 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
4194 return;
4195 }
4196
4197 case ARMISD::VST2_UPD: {
4198 if (Subtarget->hasNEON()) {
4199 static const uint16_t DOpcodes[] = {
4200 ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed,
4201 ARM::VST1q64wb_fixed};
4202 static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed,
4203 ARM::VST2q16PseudoWB_fixed,
4204 ARM::VST2q32PseudoWB_fixed};
4205 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
4206 return;
4207 }
4208 break;
4209 }
4210
4211 case ARMISD::VST3_UPD: {
4212 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
4213 ARM::VST3d16Pseudo_UPD,
4214 ARM::VST3d32Pseudo_UPD,
4215 ARM::VST1d64TPseudoWB_fixed};
4216 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
4217 ARM::VST3q16Pseudo_UPD,
4218 ARM::VST3q32Pseudo_UPD };
4219 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
4220 ARM::VST3q16oddPseudo_UPD,
4221 ARM::VST3q32oddPseudo_UPD };
4222 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4223 return;
4224 }
4225
4226 case ARMISD::VST4_UPD: {
4227 if (Subtarget->hasNEON()) {
4228 static const uint16_t DOpcodes[] = {
4229 ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD,
4230 ARM::VST1d64QPseudoWB_fixed};
4231 static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD,
4232 ARM::VST4q16Pseudo_UPD,
4233 ARM::VST4q32Pseudo_UPD};
4234 static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD,
4235 ARM::VST4q16oddPseudo_UPD,
4236 ARM::VST4q32oddPseudo_UPD};
4237 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4238 return;
4239 }
4240 break;
4241 }
4242
4243 case ARMISD::VST2LN_UPD: {
4244 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
4245 ARM::VST2LNd16Pseudo_UPD,
4246 ARM::VST2LNd32Pseudo_UPD };
4247 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
4248 ARM::VST2LNq32Pseudo_UPD };
4249 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
4250 return;
4251 }
4252
4253 case ARMISD::VST3LN_UPD: {
4254 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
4255 ARM::VST3LNd16Pseudo_UPD,
4256 ARM::VST3LNd32Pseudo_UPD };
4257 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
4258 ARM::VST3LNq32Pseudo_UPD };
4259 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
4260 return;
4261 }
4262
4263 case ARMISD::VST4LN_UPD: {
4264 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
4265 ARM::VST4LNd16Pseudo_UPD,
4266 ARM::VST4LNd32Pseudo_UPD };
4267 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
4268 ARM::VST4LNq32Pseudo_UPD };
4269 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
4270 return;
4271 }
4272
4273 case ISD::INTRINSIC_VOID:
4274 case ISD::INTRINSIC_W_CHAIN: {
4275 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
4276 switch (IntNo) {
4277 default:
4278 break;
4279
4280 case Intrinsic::arm_mrrc:
4281 case Intrinsic::arm_mrrc2: {
4282 SDLoc dl(N);
4283 SDValue Chain = N->getOperand(0);
4284 unsigned Opc;
4285
4286 if (Subtarget->isThumb())
4287 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
4288 else
4289 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
4290
4291 SmallVector<SDValue, 5> Ops;
4292 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
4293 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
4294 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
4295
4296 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
4297 // instruction will always be '1111' but it is possible in assembly language to specify
4298 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
4299 if (Opc != ARM::MRRC2) {
4300 Ops.push_back(getAL(CurDAG, dl));
4301 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4302 }
4303
4304 Ops.push_back(Chain);
4305
4306 // Writes to two registers.
4307 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
4308
4309 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
4310 return;
4311 }
4312 case Intrinsic::arm_ldaexd:
4313 case Intrinsic::arm_ldrexd: {
4314 SDLoc dl(N);
4315 SDValue Chain = N->getOperand(0);
4316 SDValue MemAddr = N->getOperand(2);
4317 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
4318
4319 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
4320 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
4321 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
4322
4323 // arm_ldrexd returns a i64 value in {i32, i32}
4324 std::vector<EVT> ResTys;
4325 if (isThumb) {
4326 ResTys.push_back(MVT::i32);
4327 ResTys.push_back(MVT::i32);
4328 } else
4329 ResTys.push_back(MVT::Untyped);
4330 ResTys.push_back(MVT::Other);
4331
4332 // Place arguments in the right order.
4333 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
4334 CurDAG->getRegister(0, MVT::i32), Chain};
4335 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
4336 // Transfer memoperands.
4337 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
4338 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4339
4340 // Remap uses.
4341 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
4342 if (!SDValue(N, 0).use_empty()) {
4343 SDValue Result;
4344 if (isThumb)
4345 Result = SDValue(Ld, 0);
4346 else {
4347 SDValue SubRegIdx =
4348 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
4349 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
4350 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
4351 Result = SDValue(ResNode,0);
4352 }
4353 ReplaceUses(SDValue(N, 0), Result);
4354 }
4355 if (!SDValue(N, 1).use_empty()) {
4356 SDValue Result;
4357 if (isThumb)
4358 Result = SDValue(Ld, 1);
4359 else {
4360 SDValue SubRegIdx =
4361 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
4362 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
4363 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
4364 Result = SDValue(ResNode,0);
4365 }
4366 ReplaceUses(SDValue(N, 1), Result);
4367 }
4368 ReplaceUses(SDValue(N, 2), OutChain);
4369 CurDAG->RemoveDeadNode(N);
4370 return;
4371 }
4372 case Intrinsic::arm_stlexd:
4373 case Intrinsic::arm_strexd: {
4374 SDLoc dl(N);
4375 SDValue Chain = N->getOperand(0);
4376 SDValue Val0 = N->getOperand(2);
4377 SDValue Val1 = N->getOperand(3);
4378 SDValue MemAddr = N->getOperand(4);
4379
4380 // Store exclusive double return a i32 value which is the return status
4381 // of the issued store.
4382 const EVT ResTys[] = {MVT::i32, MVT::Other};
4383
4384 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
4385 // Place arguments in the right order.
4386 SmallVector<SDValue, 7> Ops;
4387 if (isThumb) {
4388 Ops.push_back(Val0);
4389 Ops.push_back(Val1);
4390 } else
4391 // arm_strexd uses GPRPair.
4392 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
4393 Ops.push_back(MemAddr);
4394 Ops.push_back(getAL(CurDAG, dl));
4395 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4396 Ops.push_back(Chain);
4397
4398 bool IsRelease = IntNo == Intrinsic::arm_stlexd;
4399 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
4400 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
4401
4402 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
4403 // Transfer memoperands.
4404 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
4405 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4406
4407 ReplaceNode(N, St);
4408 return;
4409 }
4410
4411 case Intrinsic::arm_neon_vld1: {
4412 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
4413 ARM::VLD1d32, ARM::VLD1d64 };
4414 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4415 ARM::VLD1q32, ARM::VLD1q64};
4416 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
4417 return;
4418 }
4419
4420 case Intrinsic::arm_neon_vld1x2: {
4421 static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4422 ARM::VLD1q32, ARM::VLD1q64 };
4423 static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
4424 ARM::VLD1d16QPseudo,
4425 ARM::VLD1d32QPseudo,
4426 ARM::VLD1d64QPseudo };
4427 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
4428 return;
4429 }
4430
4431 case Intrinsic::arm_neon_vld1x3: {
4432 static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
4433 ARM::VLD1d16TPseudo,
4434 ARM::VLD1d32TPseudo,
4435 ARM::VLD1d64TPseudo };
4436 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
4437 ARM::VLD1q16LowTPseudo_UPD,
4438 ARM::VLD1q32LowTPseudo_UPD,
4439 ARM::VLD1q64LowTPseudo_UPD };
4440 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
4441 ARM::VLD1q16HighTPseudo,
4442 ARM::VLD1q32HighTPseudo,
4443 ARM::VLD1q64HighTPseudo };
4444 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4445 return;
4446 }
4447
4448 case Intrinsic::arm_neon_vld1x4: {
4449 static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
4450 ARM::VLD1d16QPseudo,
4451 ARM::VLD1d32QPseudo,
4452 ARM::VLD1d64QPseudo };
4453 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
4454 ARM::VLD1q16LowQPseudo_UPD,
4455 ARM::VLD1q32LowQPseudo_UPD,
4456 ARM::VLD1q64LowQPseudo_UPD };
4457 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
4458 ARM::VLD1q16HighQPseudo,
4459 ARM::VLD1q32HighQPseudo,
4460 ARM::VLD1q64HighQPseudo };
4461 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4462 return;
4463 }
4464
4465 case Intrinsic::arm_neon_vld2: {
4466 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
4467 ARM::VLD2d32, ARM::VLD1q64 };
4468 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
4469 ARM::VLD2q32Pseudo };
4470 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
4471 return;
4472 }
4473
4474 case Intrinsic::arm_neon_vld3: {
4475 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
4476 ARM::VLD3d16Pseudo,
4477 ARM::VLD3d32Pseudo,
4478 ARM::VLD1d64TPseudo };
4479 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4480 ARM::VLD3q16Pseudo_UPD,
4481 ARM::VLD3q32Pseudo_UPD };
4482 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
4483 ARM::VLD3q16oddPseudo,
4484 ARM::VLD3q32oddPseudo };
4485 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4486 return;
4487 }
4488
4489 case Intrinsic::arm_neon_vld4: {
4490 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
4491 ARM::VLD4d16Pseudo,
4492 ARM::VLD4d32Pseudo,
4493 ARM::VLD1d64QPseudo };
4494 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
4495 ARM::VLD4q16Pseudo_UPD,
4496 ARM::VLD4q32Pseudo_UPD };
4497 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
4498 ARM::VLD4q16oddPseudo,
4499 ARM::VLD4q32oddPseudo };
4500 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4501 return;
4502 }
4503
4504 case Intrinsic::arm_neon_vld2dup: {
4505 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4506 ARM::VLD2DUPd32, ARM::VLD1q64 };
4507 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4508 ARM::VLD2DUPq16EvenPseudo,
4509 ARM::VLD2DUPq32EvenPseudo };
4510 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
4511 ARM::VLD2DUPq16OddPseudo,
4512 ARM::VLD2DUPq32OddPseudo };
4513 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
4514 DOpcodes, QOpcodes0, QOpcodes1);
4515 return;
4516 }
4517
4518 case Intrinsic::arm_neon_vld3dup: {
4519 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
4520 ARM::VLD3DUPd16Pseudo,
4521 ARM::VLD3DUPd32Pseudo,
4522 ARM::VLD1d64TPseudo };
4523 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4524 ARM::VLD3DUPq16EvenPseudo,
4525 ARM::VLD3DUPq32EvenPseudo };
4526 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
4527 ARM::VLD3DUPq16OddPseudo,
4528 ARM::VLD3DUPq32OddPseudo };
4529 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
4530 DOpcodes, QOpcodes0, QOpcodes1);
4531 return;
4532 }
4533
4534 case Intrinsic::arm_neon_vld4dup: {
4535 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
4536 ARM::VLD4DUPd16Pseudo,
4537 ARM::VLD4DUPd32Pseudo,
4538 ARM::VLD1d64QPseudo };
4539 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4540 ARM::VLD4DUPq16EvenPseudo,
4541 ARM::VLD4DUPq32EvenPseudo };
4542 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
4543 ARM::VLD4DUPq16OddPseudo,
4544 ARM::VLD4DUPq32OddPseudo };
4545 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
4546 DOpcodes, QOpcodes0, QOpcodes1);
4547 return;
4548 }
4549
4550 case Intrinsic::arm_neon_vld2lane: {
4551 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
4552 ARM::VLD2LNd16Pseudo,
4553 ARM::VLD2LNd32Pseudo };
4554 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
4555 ARM::VLD2LNq32Pseudo };
4556 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
4557 return;
4558 }
4559
4560 case Intrinsic::arm_neon_vld3lane: {
4561 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
4562 ARM::VLD3LNd16Pseudo,
4563 ARM::VLD3LNd32Pseudo };
4564 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
4565 ARM::VLD3LNq32Pseudo };
4566 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
4567 return;
4568 }
4569
4570 case Intrinsic::arm_neon_vld4lane: {
4571 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
4572 ARM::VLD4LNd16Pseudo,
4573 ARM::VLD4LNd32Pseudo };
4574 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
4575 ARM::VLD4LNq32Pseudo };
4576 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
4577 return;
4578 }
4579
4580 case Intrinsic::arm_neon_vst1: {
4581 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
4582 ARM::VST1d32, ARM::VST1d64 };
4583 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
4584 ARM::VST1q32, ARM::VST1q64 };
4585 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
4586 return;
4587 }
4588
4589 case Intrinsic::arm_neon_vst1x2: {
4590 static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
4591 ARM::VST1q32, ARM::VST1q64 };
4592 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
4593 ARM::VST1d16QPseudo,
4594 ARM::VST1d32QPseudo,
4595 ARM::VST1d64QPseudo };
4596 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
4597 return;
4598 }
4599
4600 case Intrinsic::arm_neon_vst1x3: {
4601 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
4602 ARM::VST1d16TPseudo,
4603 ARM::VST1d32TPseudo,
4604 ARM::VST1d64TPseudo };
4605 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
4606 ARM::VST1q16LowTPseudo_UPD,
4607 ARM::VST1q32LowTPseudo_UPD,
4608 ARM::VST1q64LowTPseudo_UPD };
4609 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
4610 ARM::VST1q16HighTPseudo,
4611 ARM::VST1q32HighTPseudo,
4612 ARM::VST1q64HighTPseudo };
4613 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4614 return;
4615 }
4616
4617 case Intrinsic::arm_neon_vst1x4: {
4618 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
4619 ARM::VST1d16QPseudo,
4620 ARM::VST1d32QPseudo,
4621 ARM::VST1d64QPseudo };
4622 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
4623 ARM::VST1q16LowQPseudo_UPD,
4624 ARM::VST1q32LowQPseudo_UPD,
4625 ARM::VST1q64LowQPseudo_UPD };
4626 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
4627 ARM::VST1q16HighQPseudo,
4628 ARM::VST1q32HighQPseudo,
4629 ARM::VST1q64HighQPseudo };
4630 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4631 return;
4632 }
4633
4634 case Intrinsic::arm_neon_vst2: {
4635 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
4636 ARM::VST2d32, ARM::VST1q64 };
4637 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
4638 ARM::VST2q32Pseudo };
4639 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
4640 return;
4641 }
4642
4643 case Intrinsic::arm_neon_vst3: {
4644 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
4645 ARM::VST3d16Pseudo,
4646 ARM::VST3d32Pseudo,
4647 ARM::VST1d64TPseudo };
4648 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
4649 ARM::VST3q16Pseudo_UPD,
4650 ARM::VST3q32Pseudo_UPD };
4651 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
4652 ARM::VST3q16oddPseudo,
4653 ARM::VST3q32oddPseudo };
4654 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4655 return;
4656 }
4657
4658 case Intrinsic::arm_neon_vst4: {
4659 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
4660 ARM::VST4d16Pseudo,
4661 ARM::VST4d32Pseudo,
4662 ARM::VST1d64QPseudo };
4663 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
4664 ARM::VST4q16Pseudo_UPD,
4665 ARM::VST4q32Pseudo_UPD };
4666 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
4667 ARM::VST4q16oddPseudo,
4668 ARM::VST4q32oddPseudo };
4669 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4670 return;
4671 }
4672
4673 case Intrinsic::arm_neon_vst2lane: {
4674 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
4675 ARM::VST2LNd16Pseudo,
4676 ARM::VST2LNd32Pseudo };
4677 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
4678 ARM::VST2LNq32Pseudo };
4679 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
4680 return;
4681 }
4682
4683 case Intrinsic::arm_neon_vst3lane: {
4684 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
4685 ARM::VST3LNd16Pseudo,
4686 ARM::VST3LNd32Pseudo };
4687 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
4688 ARM::VST3LNq32Pseudo };
4689 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
4690 return;
4691 }
4692
4693 case Intrinsic::arm_neon_vst4lane: {
4694 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
4695 ARM::VST4LNd16Pseudo,
4696 ARM::VST4LNd32Pseudo };
4697 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
4698 ARM::VST4LNq32Pseudo };
4699 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
4700 return;
4701 }
4702
4703 case Intrinsic::arm_mve_vldr_gather_base_wb:
4704 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
4705 static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre,
4706 ARM::MVE_VLDRDU64_qi_pre};
4707 SelectMVE_WB(N, Opcodes,
4708 IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated);
4709 return;
4710 }
4711
4712 case Intrinsic::arm_mve_vld2q: {
4713 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8};
4714 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
4715 ARM::MVE_VLD21_16};
4716 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
4717 ARM::MVE_VLD21_32};
4718 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4719 SelectMVE_VLD(N, 2, Opcodes, false);
4720 return;
4721 }
4722
4723 case Intrinsic::arm_mve_vld4q: {
4724 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
4725 ARM::MVE_VLD42_8, ARM::MVE_VLD43_8};
4726 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
4727 ARM::MVE_VLD42_16,
4728 ARM::MVE_VLD43_16};
4729 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
4730 ARM::MVE_VLD42_32,
4731 ARM::MVE_VLD43_32};
4732 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4733 SelectMVE_VLD(N, 4, Opcodes, false);
4734 return;
4735 }
4736 }
4737 break;
4738 }
4739
4740 case ISD::INTRINSIC_WO_CHAIN: {
4741 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
4742 switch (IntNo) {
4743 default:
4744 break;
4745
4746 // Scalar f32 -> bf16
4747 case Intrinsic::arm_neon_vcvtbfp2bf: {
4748 SDLoc dl(N);
4749 const SDValue &Src = N->getOperand(1);
4750 llvm::EVT DestTy = N->getValueType(0);
4751 SDValue Pred = getAL(CurDAG, dl);
4752 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
4753 SDValue Ops[] = { Src, Src, Pred, Reg0 };
4754 CurDAG->SelectNodeTo(N, ARM::BF16_VCVTB, DestTy, Ops);
4755 return;
4756 }
4757
4758 // Vector v4f32 -> v4bf16
4759 case Intrinsic::arm_neon_vcvtfp2bf: {
4760 SDLoc dl(N);
4761 const SDValue &Src = N->getOperand(1);
4762 SDValue Pred = getAL(CurDAG, dl);
4763 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
4764 SDValue Ops[] = { Src, Pred, Reg0 };
4765 CurDAG->SelectNodeTo(N, ARM::BF16_VCVT, MVT::v4bf16, Ops);
4766 return;
4767 }
4768
4769 case Intrinsic::arm_mve_urshrl:
4770 SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false);
4771 return;
4772 case Intrinsic::arm_mve_uqshll:
4773 SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false);
4774 return;
4775 case Intrinsic::arm_mve_srshrl:
4776 SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false);
4777 return;
4778 case Intrinsic::arm_mve_sqshll:
4779 SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false);
4780 return;
4781 case Intrinsic::arm_mve_uqrshll:
4782 SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true);
4783 return;
4784 case Intrinsic::arm_mve_sqrshrl:
4785 SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true);
4786 return;
4787
4788 case Intrinsic::arm_mve_vadc:
4789 case Intrinsic::arm_mve_vadc_predicated:
4790 SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true,
4791 IntNo == Intrinsic::arm_mve_vadc_predicated);
4792 return;
4793 case Intrinsic::arm_mve_vsbc:
4794 case Intrinsic::arm_mve_vsbc_predicated:
4795 SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, true,
4796 IntNo == Intrinsic::arm_mve_vsbc_predicated);
4797 return;
4798 case Intrinsic::arm_mve_vshlc:
4799 case Intrinsic::arm_mve_vshlc_predicated:
4800 SelectMVE_VSHLC(N, IntNo == Intrinsic::arm_mve_vshlc_predicated);
4801 return;
4802
4803 case Intrinsic::arm_mve_vmlldava:
4804 case Intrinsic::arm_mve_vmlldava_predicated: {
4805 static const uint16_t OpcodesU[] = {
4806 ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32,
4807 ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32,
4808 };
4809 static const uint16_t OpcodesS[] = {
4810 ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32,
4811 ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32,
4812 ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32,
4813 ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32,
4814 ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32,
4815 ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32,
4816 ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32,
4817 ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32,
4818 };
4819 SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated,
4820 OpcodesS, OpcodesU);
4821 return;
4822 }
4823
4824 case Intrinsic::arm_mve_vrmlldavha:
4825 case Intrinsic::arm_mve_vrmlldavha_predicated: {
4826 static const uint16_t OpcodesU[] = {
4827 ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32,
4828 };
4829 static const uint16_t OpcodesS[] = {
4830 ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32,
4831 ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32,
4832 ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32,
4833 ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32,
4834 };
4835 SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated,
4836 OpcodesS, OpcodesU);
4837 return;
4838 }
4839
4840 case Intrinsic::arm_mve_vidup:
4841 case Intrinsic::arm_mve_vidup_predicated: {
4842 static const uint16_t Opcodes[] = {
4843 ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32,
4844 };
4845 SelectMVE_VxDUP(N, Opcodes, false,
4846 IntNo == Intrinsic::arm_mve_vidup_predicated);
4847 return;
4848 }
4849
4850 case Intrinsic::arm_mve_vddup:
4851 case Intrinsic::arm_mve_vddup_predicated: {
4852 static const uint16_t Opcodes[] = {
4853 ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32,
4854 };
4855 SelectMVE_VxDUP(N, Opcodes, false,
4856 IntNo == Intrinsic::arm_mve_vddup_predicated);
4857 return;
4858 }
4859
4860 case Intrinsic::arm_mve_viwdup:
4861 case Intrinsic::arm_mve_viwdup_predicated: {
4862 static const uint16_t Opcodes[] = {
4863 ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32,
4864 };
4865 SelectMVE_VxDUP(N, Opcodes, true,
4866 IntNo == Intrinsic::arm_mve_viwdup_predicated);
4867 return;
4868 }
4869
4870 case Intrinsic::arm_mve_vdwdup:
4871 case Intrinsic::arm_mve_vdwdup_predicated: {
4872 static const uint16_t Opcodes[] = {
4873 ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32,
4874 };
4875 SelectMVE_VxDUP(N, Opcodes, true,
4876 IntNo == Intrinsic::arm_mve_vdwdup_predicated);
4877 return;
4878 }
4879
4880 case Intrinsic::arm_cde_cx1d:
4881 case Intrinsic::arm_cde_cx1da:
4882 case Intrinsic::arm_cde_cx2d:
4883 case Intrinsic::arm_cde_cx2da:
4884 case Intrinsic::arm_cde_cx3d:
4885 case Intrinsic::arm_cde_cx3da: {
4886 bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da ||
4887 IntNo == Intrinsic::arm_cde_cx2da ||
4888 IntNo == Intrinsic::arm_cde_cx3da;
4889 size_t NumExtraOps;
4890 uint16_t Opcode;
4891 switch (IntNo) {
4892 case Intrinsic::arm_cde_cx1d:
4893 case Intrinsic::arm_cde_cx1da:
4894 NumExtraOps = 0;
4895 Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D;
4896 break;
4897 case Intrinsic::arm_cde_cx2d:
4898 case Intrinsic::arm_cde_cx2da:
4899 NumExtraOps = 1;
4900 Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D;
4901 break;
4902 case Intrinsic::arm_cde_cx3d:
4903 case Intrinsic::arm_cde_cx3da:
4904 NumExtraOps = 2;
4905 Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D;
4906 break;
4907 default:
4908 llvm_unreachable("Unexpected opcode");
4909 }
4910 SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum);
4911 return;
4912 }
4913 }
4914 break;
4915 }
4916
4917 case ISD::ATOMIC_CMP_SWAP:
4918 SelectCMP_SWAP(N);
4919 return;
4920 }
4921
4922 SelectCode(N);
4923 }
4924
4925 // Inspect a register string of the form
4926 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
4927 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
4928 // and obtain the integer operands from them, adding these operands to the
4929 // provided vector.
getIntOperandsFromRegisterString(StringRef RegString,SelectionDAG * CurDAG,const SDLoc & DL,std::vector<SDValue> & Ops)4930 static void getIntOperandsFromRegisterString(StringRef RegString,
4931 SelectionDAG *CurDAG,
4932 const SDLoc &DL,
4933 std::vector<SDValue> &Ops) {
4934 SmallVector<StringRef, 5> Fields;
4935 RegString.split(Fields, ':');
4936
4937 if (Fields.size() > 1) {
4938 bool AllIntFields = true;
4939
4940 for (StringRef Field : Fields) {
4941 // Need to trim out leading 'cp' characters and get the integer field.
4942 unsigned IntField;
4943 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
4944 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
4945 }
4946
4947 assert(AllIntFields &&
4948 "Unexpected non-integer value in special register string.");
4949 }
4950 }
4951
4952 // Maps a Banked Register string to its mask value. The mask value returned is
4953 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
4954 // mask operand, which expresses which register is to be used, e.g. r8, and in
4955 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
4956 // was invalid.
getBankedRegisterMask(StringRef RegString)4957 static inline int getBankedRegisterMask(StringRef RegString) {
4958 auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());
4959 if (!TheReg)
4960 return -1;
4961 return TheReg->Encoding;
4962 }
4963
4964 // The flags here are common to those allowed for apsr in the A class cores and
4965 // those allowed for the special registers in the M class cores. Returns a
4966 // value representing which flags were present, -1 if invalid.
getMClassFlagsMask(StringRef Flags)4967 static inline int getMClassFlagsMask(StringRef Flags) {
4968 return StringSwitch<int>(Flags)
4969 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
4970 // correct when flags are not permitted
4971 .Case("g", 0x1)
4972 .Case("nzcvq", 0x2)
4973 .Case("nzcvqg", 0x3)
4974 .Default(-1);
4975 }
4976
4977 // Maps MClass special registers string to its value for use in the
4978 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
4979 // Returns -1 to signify that the string was invalid.
getMClassRegisterMask(StringRef Reg,const ARMSubtarget * Subtarget)4980 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
4981 auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg);
4982 const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
4983 if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits))
4984 return -1;
4985 return (int)(TheReg->Encoding & 0xFFF); // SYSm value
4986 }
4987
getARClassRegisterMask(StringRef Reg,StringRef Flags)4988 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
4989 // The mask operand contains the special register (R Bit) in bit 4, whether
4990 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
4991 // bits 3-0 contains the fields to be accessed in the special register, set by
4992 // the flags provided with the register.
4993 int Mask = 0;
4994 if (Reg == "apsr") {
4995 // The flags permitted for apsr are the same flags that are allowed in
4996 // M class registers. We get the flag value and then shift the flags into
4997 // the correct place to combine with the mask.
4998 Mask = getMClassFlagsMask(Flags);
4999 if (Mask == -1)
5000 return -1;
5001 return Mask << 2;
5002 }
5003
5004 if (Reg != "cpsr" && Reg != "spsr") {
5005 return -1;
5006 }
5007
5008 // This is the same as if the flags were "fc"
5009 if (Flags.empty() || Flags == "all")
5010 return Mask | 0x9;
5011
5012 // Inspect the supplied flags string and set the bits in the mask for
5013 // the relevant and valid flags allowed for cpsr and spsr.
5014 for (char Flag : Flags) {
5015 int FlagVal;
5016 switch (Flag) {
5017 case 'c':
5018 FlagVal = 0x1;
5019 break;
5020 case 'x':
5021 FlagVal = 0x2;
5022 break;
5023 case 's':
5024 FlagVal = 0x4;
5025 break;
5026 case 'f':
5027 FlagVal = 0x8;
5028 break;
5029 default:
5030 FlagVal = 0;
5031 }
5032
5033 // This avoids allowing strings where the same flag bit appears twice.
5034 if (!FlagVal || (Mask & FlagVal))
5035 return -1;
5036 Mask |= FlagVal;
5037 }
5038
5039 // If the register is spsr then we need to set the R bit.
5040 if (Reg == "spsr")
5041 Mask |= 0x10;
5042
5043 return Mask;
5044 }
5045
5046 // Lower the read_register intrinsic to ARM specific DAG nodes
5047 // using the supplied metadata string to select the instruction node to use
5048 // and the registers/masks to construct as operands for the node.
tryReadRegister(SDNode * N)5049 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
5050 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
5051 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
5052 bool IsThumb2 = Subtarget->isThumb2();
5053 SDLoc DL(N);
5054
5055 std::vector<SDValue> Ops;
5056 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
5057
5058 if (!Ops.empty()) {
5059 // If the special register string was constructed of fields (as defined
5060 // in the ACLE) then need to lower to MRC node (32 bit) or
5061 // MRRC node(64 bit), we can make the distinction based on the number of
5062 // operands we have.
5063 unsigned Opcode;
5064 SmallVector<EVT, 3> ResTypes;
5065 if (Ops.size() == 5){
5066 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
5067 ResTypes.append({ MVT::i32, MVT::Other });
5068 } else {
5069 assert(Ops.size() == 3 &&
5070 "Invalid number of fields in special register string.");
5071 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
5072 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
5073 }
5074
5075 Ops.push_back(getAL(CurDAG, DL));
5076 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
5077 Ops.push_back(N->getOperand(0));
5078 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
5079 return true;
5080 }
5081
5082 std::string SpecialReg = RegString->getString().lower();
5083
5084 int BankedReg = getBankedRegisterMask(SpecialReg);
5085 if (BankedReg != -1) {
5086 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
5087 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5088 N->getOperand(0) };
5089 ReplaceNode(
5090 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
5091 DL, MVT::i32, MVT::Other, Ops));
5092 return true;
5093 }
5094
5095 // The VFP registers are read by creating SelectionDAG nodes with opcodes
5096 // corresponding to the register that is being read from. So we switch on the
5097 // string to find which opcode we need to use.
5098 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5099 .Case("fpscr", ARM::VMRS)
5100 .Case("fpexc", ARM::VMRS_FPEXC)
5101 .Case("fpsid", ARM::VMRS_FPSID)
5102 .Case("mvfr0", ARM::VMRS_MVFR0)
5103 .Case("mvfr1", ARM::VMRS_MVFR1)
5104 .Case("mvfr2", ARM::VMRS_MVFR2)
5105 .Case("fpinst", ARM::VMRS_FPINST)
5106 .Case("fpinst2", ARM::VMRS_FPINST2)
5107 .Default(0);
5108
5109 // If an opcode was found then we can lower the read to a VFP instruction.
5110 if (Opcode) {
5111 if (!Subtarget->hasVFP2Base())
5112 return false;
5113 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
5114 return false;
5115
5116 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5117 N->getOperand(0) };
5118 ReplaceNode(N,
5119 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
5120 return true;
5121 }
5122
5123 // If the target is M Class then need to validate that the register string
5124 // is an acceptable value, so check that a mask can be constructed from the
5125 // string.
5126 if (Subtarget->isMClass()) {
5127 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
5128 if (SYSmValue == -1)
5129 return false;
5130
5131 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
5132 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5133 N->getOperand(0) };
5134 ReplaceNode(
5135 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
5136 return true;
5137 }
5138
5139 // Here we know the target is not M Class so we need to check if it is one
5140 // of the remaining possible values which are apsr, cpsr or spsr.
5141 if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
5142 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5143 N->getOperand(0) };
5144 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
5145 DL, MVT::i32, MVT::Other, Ops));
5146 return true;
5147 }
5148
5149 if (SpecialReg == "spsr") {
5150 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5151 N->getOperand(0) };
5152 ReplaceNode(
5153 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
5154 MVT::i32, MVT::Other, Ops));
5155 return true;
5156 }
5157
5158 return false;
5159 }
5160
5161 // Lower the write_register intrinsic to ARM specific DAG nodes
5162 // using the supplied metadata string to select the instruction node to use
5163 // and the registers/masks to use in the nodes
tryWriteRegister(SDNode * N)5164 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
5165 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
5166 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
5167 bool IsThumb2 = Subtarget->isThumb2();
5168 SDLoc DL(N);
5169
5170 std::vector<SDValue> Ops;
5171 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
5172
5173 if (!Ops.empty()) {
5174 // If the special register string was constructed of fields (as defined
5175 // in the ACLE) then need to lower to MCR node (32 bit) or
5176 // MCRR node(64 bit), we can make the distinction based on the number of
5177 // operands we have.
5178 unsigned Opcode;
5179 if (Ops.size() == 5) {
5180 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
5181 Ops.insert(Ops.begin()+2, N->getOperand(2));
5182 } else {
5183 assert(Ops.size() == 3 &&
5184 "Invalid number of fields in special register string.");
5185 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
5186 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
5187 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
5188 }
5189
5190 Ops.push_back(getAL(CurDAG, DL));
5191 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
5192 Ops.push_back(N->getOperand(0));
5193
5194 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
5195 return true;
5196 }
5197
5198 std::string SpecialReg = RegString->getString().lower();
5199 int BankedReg = getBankedRegisterMask(SpecialReg);
5200 if (BankedReg != -1) {
5201 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
5202 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5203 N->getOperand(0) };
5204 ReplaceNode(
5205 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
5206 DL, MVT::Other, Ops));
5207 return true;
5208 }
5209
5210 // The VFP registers are written to by creating SelectionDAG nodes with
5211 // opcodes corresponding to the register that is being written. So we switch
5212 // on the string to find which opcode we need to use.
5213 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5214 .Case("fpscr", ARM::VMSR)
5215 .Case("fpexc", ARM::VMSR_FPEXC)
5216 .Case("fpsid", ARM::VMSR_FPSID)
5217 .Case("fpinst", ARM::VMSR_FPINST)
5218 .Case("fpinst2", ARM::VMSR_FPINST2)
5219 .Default(0);
5220
5221 if (Opcode) {
5222 if (!Subtarget->hasVFP2Base())
5223 return false;
5224 Ops = { N->getOperand(2), getAL(CurDAG, DL),
5225 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
5226 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
5227 return true;
5228 }
5229
5230 std::pair<StringRef, StringRef> Fields;
5231 Fields = StringRef(SpecialReg).rsplit('_');
5232 std::string Reg = Fields.first.str();
5233 StringRef Flags = Fields.second;
5234
5235 // If the target was M Class then need to validate the special register value
5236 // and retrieve the mask for use in the instruction node.
5237 if (Subtarget->isMClass()) {
5238 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
5239 if (SYSmValue == -1)
5240 return false;
5241
5242 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
5243 N->getOperand(2), getAL(CurDAG, DL),
5244 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
5245 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
5246 return true;
5247 }
5248
5249 // We then check to see if a valid mask can be constructed for one of the
5250 // register string values permitted for the A and R class cores. These values
5251 // are apsr, spsr and cpsr; these are also valid on older cores.
5252 int Mask = getARClassRegisterMask(Reg, Flags);
5253 if (Mask != -1) {
5254 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
5255 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5256 N->getOperand(0) };
5257 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
5258 DL, MVT::Other, Ops));
5259 return true;
5260 }
5261
5262 return false;
5263 }
5264
tryInlineAsm(SDNode * N)5265 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
5266 std::vector<SDValue> AsmNodeOperands;
5267 unsigned Flag, Kind;
5268 bool Changed = false;
5269 unsigned NumOps = N->getNumOperands();
5270
5271 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
5272 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
5273 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
5274 // respectively. Since there is no constraint to explicitly specify a
5275 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
5276 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
5277 // them into a GPRPair.
5278
5279 SDLoc dl(N);
5280 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
5281 : SDValue(nullptr,0);
5282
5283 SmallVector<bool, 8> OpChanged;
5284 // Glue node will be appended late.
5285 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
5286 SDValue op = N->getOperand(i);
5287 AsmNodeOperands.push_back(op);
5288
5289 if (i < InlineAsm::Op_FirstOperand)
5290 continue;
5291
5292 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
5293 Flag = C->getZExtValue();
5294 Kind = InlineAsm::getKind(Flag);
5295 }
5296 else
5297 continue;
5298
5299 // Immediate operands to inline asm in the SelectionDAG are modeled with
5300 // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
5301 // the second is a constant with the value of the immediate. If we get here
5302 // and we have a Kind_Imm, skip the next operand, and continue.
5303 if (Kind == InlineAsm::Kind_Imm) {
5304 SDValue op = N->getOperand(++i);
5305 AsmNodeOperands.push_back(op);
5306 continue;
5307 }
5308
5309 unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
5310 if (NumRegs)
5311 OpChanged.push_back(false);
5312
5313 unsigned DefIdx = 0;
5314 bool IsTiedToChangedOp = false;
5315 // If it's a use that is tied with a previous def, it has no
5316 // reg class constraint.
5317 if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
5318 IsTiedToChangedOp = OpChanged[DefIdx];
5319
5320 // Memory operands to inline asm in the SelectionDAG are modeled with two
5321 // operands: a constant of value InlineAsm::Kind_Mem followed by the input
5322 // operand. If we get here and we have a Kind_Mem, skip the next operand (so
5323 // it doesn't get misinterpreted), and continue. We do this here because
5324 // it's important to update the OpChanged array correctly before moving on.
5325 if (Kind == InlineAsm::Kind_Mem) {
5326 SDValue op = N->getOperand(++i);
5327 AsmNodeOperands.push_back(op);
5328 continue;
5329 }
5330
5331 if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
5332 && Kind != InlineAsm::Kind_RegDefEarlyClobber)
5333 continue;
5334
5335 unsigned RC;
5336 bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
5337 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
5338 || NumRegs != 2)
5339 continue;
5340
5341 assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
5342 SDValue V0 = N->getOperand(i+1);
5343 SDValue V1 = N->getOperand(i+2);
5344 unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
5345 unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
5346 SDValue PairedReg;
5347 MachineRegisterInfo &MRI = MF->getRegInfo();
5348
5349 if (Kind == InlineAsm::Kind_RegDef ||
5350 Kind == InlineAsm::Kind_RegDefEarlyClobber) {
5351 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
5352 // the original GPRs.
5353
5354 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
5355 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
5356 SDValue Chain = SDValue(N,0);
5357
5358 SDNode *GU = N->getGluedUser();
5359 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
5360 Chain.getValue(1));
5361
5362 // Extract values from a GPRPair reg and copy to the original GPR reg.
5363 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
5364 RegCopy);
5365 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
5366 RegCopy);
5367 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
5368 RegCopy.getValue(1));
5369 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
5370
5371 // Update the original glue user.
5372 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
5373 Ops.push_back(T1.getValue(1));
5374 CurDAG->UpdateNodeOperands(GU, Ops);
5375 }
5376 else {
5377 // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a
5378 // GPRPair and then pass the GPRPair to the inline asm.
5379 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
5380
5381 // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
5382 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
5383 Chain.getValue(1));
5384 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
5385 T0.getValue(1));
5386 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
5387
5388 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
5389 // i32 VRs of inline asm with it.
5390 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
5391 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
5392 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
5393
5394 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
5395 Glue = Chain.getValue(1);
5396 }
5397
5398 Changed = true;
5399
5400 if(PairedReg.getNode()) {
5401 OpChanged[OpChanged.size() -1 ] = true;
5402 Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
5403 if (IsTiedToChangedOp)
5404 Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
5405 else
5406 Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
5407 // Replace the current flag.
5408 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
5409 Flag, dl, MVT::i32);
5410 // Add the new register node and skip the original two GPRs.
5411 AsmNodeOperands.push_back(PairedReg);
5412 // Skip the next two GPRs.
5413 i += 2;
5414 }
5415 }
5416
5417 if (Glue.getNode())
5418 AsmNodeOperands.push_back(Glue);
5419 if (!Changed)
5420 return false;
5421
5422 SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
5423 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
5424 New->setNodeId(-1);
5425 ReplaceNode(N, New.getNode());
5426 return true;
5427 }
5428
5429
5430 bool ARMDAGToDAGISel::
SelectInlineAsmMemoryOperand(const SDValue & Op,unsigned ConstraintID,std::vector<SDValue> & OutOps)5431 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
5432 std::vector<SDValue> &OutOps) {
5433 switch(ConstraintID) {
5434 default:
5435 llvm_unreachable("Unexpected asm memory constraint");
5436 case InlineAsm::Constraint_m:
5437 case InlineAsm::Constraint_o:
5438 case InlineAsm::Constraint_Q:
5439 case InlineAsm::Constraint_Um:
5440 case InlineAsm::Constraint_Un:
5441 case InlineAsm::Constraint_Uq:
5442 case InlineAsm::Constraint_Us:
5443 case InlineAsm::Constraint_Ut:
5444 case InlineAsm::Constraint_Uv:
5445 case InlineAsm::Constraint_Uy:
5446 // Require the address to be in a register. That is safe for all ARM
5447 // variants and it is hard to do anything much smarter without knowing
5448 // how the operand is used.
5449 OutOps.push_back(Op);
5450 return false;
5451 }
5452 return true;
5453 }
5454
5455 /// createARMISelDag - This pass converts a legalized DAG into a
5456 /// ARM-specific DAG, ready for instruction scheduling.
5457 ///
createARMISelDag(ARMBaseTargetMachine & TM,CodeGenOpt::Level OptLevel)5458 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
5459 CodeGenOpt::Level OptLevel) {
5460 return new ARMDAGToDAGISel(TM, OptLevel);
5461 }
5462