1 //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the InstructionSelector class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13
14 #include "AArch64GlobalISelUtils.h"
15 #include "AArch64InstrInfo.h"
16 #include "AArch64MachineFunctionInfo.h"
17 #include "AArch64RegisterBankInfo.h"
18 #include "AArch64RegisterInfo.h"
19 #include "AArch64Subtarget.h"
20 #include "AArch64TargetMachine.h"
21 #include "AArch64GlobalISelUtils.h"
22 #include "MCTargetDesc/AArch64AddressingModes.h"
23 #include "MCTargetDesc/AArch64MCTargetDesc.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
26 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
27 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
28 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
29 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
30 #include "llvm/CodeGen/MachineBasicBlock.h"
31 #include "llvm/CodeGen/MachineConstantPool.h"
32 #include "llvm/CodeGen/MachineFunction.h"
33 #include "llvm/CodeGen/MachineInstr.h"
34 #include "llvm/CodeGen/MachineInstrBuilder.h"
35 #include "llvm/CodeGen/MachineMemOperand.h"
36 #include "llvm/CodeGen/MachineOperand.h"
37 #include "llvm/CodeGen/MachineRegisterInfo.h"
38 #include "llvm/CodeGen/TargetOpcodes.h"
39 #include "llvm/IR/Constants.h"
40 #include "llvm/IR/DerivedTypes.h"
41 #include "llvm/IR/Instructions.h"
42 #include "llvm/IR/PatternMatch.h"
43 #include "llvm/IR/Type.h"
44 #include "llvm/IR/IntrinsicsAArch64.h"
45 #include "llvm/Pass.h"
46 #include "llvm/Support/Debug.h"
47 #include "llvm/Support/raw_ostream.h"
48
49 #define DEBUG_TYPE "aarch64-isel"
50
51 using namespace llvm;
52 using namespace MIPatternMatch;
53 using namespace AArch64GISelUtils;
54
55 namespace llvm {
56 class BlockFrequencyInfo;
57 class ProfileSummaryInfo;
58 }
59
60 namespace {
61
62 #define GET_GLOBALISEL_PREDICATE_BITSET
63 #include "AArch64GenGlobalISel.inc"
64 #undef GET_GLOBALISEL_PREDICATE_BITSET
65
66 class AArch64InstructionSelector : public InstructionSelector {
67 public:
68 AArch64InstructionSelector(const AArch64TargetMachine &TM,
69 const AArch64Subtarget &STI,
70 const AArch64RegisterBankInfo &RBI);
71
72 bool select(MachineInstr &I) override;
getName()73 static const char *getName() { return DEBUG_TYPE; }
74
setupMF(MachineFunction & MF,GISelKnownBits * KB,CodeGenCoverage & CoverageInfo,ProfileSummaryInfo * PSI,BlockFrequencyInfo * BFI)75 void setupMF(MachineFunction &MF, GISelKnownBits *KB,
76 CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI,
77 BlockFrequencyInfo *BFI) override {
78 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
79 MIB.setMF(MF);
80
81 // hasFnAttribute() is expensive to call on every BRCOND selection, so
82 // cache it here for each run of the selector.
83 ProduceNonFlagSettingCondBr =
84 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
85 MFReturnAddr = Register();
86
87 processPHIs(MF);
88 }
89
90 private:
91 /// tblgen-erated 'select' implementation, used as the initial selector for
92 /// the patterns that don't require complex C++.
93 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
94
95 // A lowering phase that runs before any selection attempts.
96 // Returns true if the instruction was modified.
97 bool preISelLower(MachineInstr &I);
98
99 // An early selection function that runs before the selectImpl() call.
100 bool earlySelect(MachineInstr &I);
101
102 // Do some preprocessing of G_PHIs before we begin selection.
103 void processPHIs(MachineFunction &MF);
104
105 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
106
107 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
108 bool contractCrossBankCopyIntoStore(MachineInstr &I,
109 MachineRegisterInfo &MRI);
110
111 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
112
113 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
114 MachineRegisterInfo &MRI) const;
115 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
116 MachineRegisterInfo &MRI) const;
117
118 ///@{
119 /// Helper functions for selectCompareBranch.
120 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
121 MachineIRBuilder &MIB) const;
122 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
123 MachineIRBuilder &MIB) const;
124 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
125 MachineIRBuilder &MIB) const;
126 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
127 MachineBasicBlock *DstMBB,
128 MachineIRBuilder &MIB) const;
129 ///@}
130
131 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
132 MachineRegisterInfo &MRI);
133
134 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
135 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
136
137 // Helper to generate an equivalent of scalar_to_vector into a new register,
138 // returned via 'Dst'.
139 MachineInstr *emitScalarToVector(unsigned EltSize,
140 const TargetRegisterClass *DstRC,
141 Register Scalar,
142 MachineIRBuilder &MIRBuilder) const;
143
144 /// Emit a lane insert into \p DstReg, or a new vector register if None is
145 /// provided.
146 ///
147 /// The lane inserted into is defined by \p LaneIdx. The vector source
148 /// register is given by \p SrcReg. The register containing the element is
149 /// given by \p EltReg.
150 MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
151 Register EltReg, unsigned LaneIdx,
152 const RegisterBank &RB,
153 MachineIRBuilder &MIRBuilder) const;
154
155 /// Emit a sequence of instructions representing a constant \p CV for a
156 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
157 ///
158 /// \returns the last instruction in the sequence on success, and nullptr
159 /// otherwise.
160 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
161 MachineIRBuilder &MIRBuilder,
162 MachineRegisterInfo &MRI);
163
164 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI);
165 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
166 MachineRegisterInfo &MRI);
167 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
168 /// SUBREG_TO_REG.
169 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
170 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
171 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
172 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
173
174 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
175 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
176 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
177 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
178
179 /// Helper function to select vector load intrinsics like
180 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
181 /// \p Opc is the opcode that the selected instruction should use.
182 /// \p NumVecs is the number of vector destinations for the instruction.
183 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
184 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
185 MachineInstr &I);
186 bool selectIntrinsicWithSideEffects(MachineInstr &I,
187 MachineRegisterInfo &MRI);
188 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
189 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI);
190 bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
191 bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
192 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
193 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
194 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
195 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
196 bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
197
198 unsigned emitConstantPoolEntry(const Constant *CPVal,
199 MachineFunction &MF) const;
200 MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
201 MachineIRBuilder &MIRBuilder) const;
202
203 // Emit a vector concat operation.
204 MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
205 Register Op2,
206 MachineIRBuilder &MIRBuilder) const;
207
208 // Emit an integer compare between LHS and RHS, which checks for Predicate.
209 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
210 MachineOperand &Predicate,
211 MachineIRBuilder &MIRBuilder) const;
212
213 /// Emit a floating point comparison between \p LHS and \p RHS.
214 /// \p Pred if given is the intended predicate to use.
215 MachineInstr *emitFPCompare(Register LHS, Register RHS,
216 MachineIRBuilder &MIRBuilder,
217 Optional<CmpInst::Predicate> = None) const;
218
219 MachineInstr *emitInstr(unsigned Opcode,
220 std::initializer_list<llvm::DstOp> DstOps,
221 std::initializer_list<llvm::SrcOp> SrcOps,
222 MachineIRBuilder &MIRBuilder,
223 const ComplexRendererFns &RenderFns = None) const;
224 /// Helper function to emit an add or sub instruction.
225 ///
226 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
227 /// in a specific order.
228 ///
229 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
230 ///
231 /// \code
232 /// const std::array<std::array<unsigned, 2>, 4> Table {
233 /// {{AArch64::ADDXri, AArch64::ADDWri},
234 /// {AArch64::ADDXrs, AArch64::ADDWrs},
235 /// {AArch64::ADDXrr, AArch64::ADDWrr},
236 /// {AArch64::SUBXri, AArch64::SUBWri},
237 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
238 /// \endcode
239 ///
240 /// Each row in the table corresponds to a different addressing mode. Each
241 /// column corresponds to a different register size.
242 ///
243 /// \attention Rows must be structured as follows:
244 /// - Row 0: The ri opcode variants
245 /// - Row 1: The rs opcode variants
246 /// - Row 2: The rr opcode variants
247 /// - Row 3: The ri opcode variants for negative immediates
248 /// - Row 4: The rx opcode variants
249 ///
250 /// \attention Columns must be structured as follows:
251 /// - Column 0: The 64-bit opcode variants
252 /// - Column 1: The 32-bit opcode variants
253 ///
254 /// \p Dst is the destination register of the binop to emit.
255 /// \p LHS is the left-hand operand of the binop to emit.
256 /// \p RHS is the right-hand operand of the binop to emit.
257 MachineInstr *emitAddSub(
258 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
259 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
260 MachineIRBuilder &MIRBuilder) const;
261 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
262 MachineOperand &RHS,
263 MachineIRBuilder &MIRBuilder) const;
264 MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
265 MachineIRBuilder &MIRBuilder) const;
266 MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
267 MachineIRBuilder &MIRBuilder) const;
268 MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
269 MachineIRBuilder &MIRBuilder) const;
270 MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
271 MachineIRBuilder &MIRBuilder) const;
272 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
273 AArch64CC::CondCode CC,
274 MachineIRBuilder &MIRBuilder) const;
275 MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
276 const RegisterBank &DstRB, LLT ScalarTy,
277 Register VecReg, unsigned LaneIdx,
278 MachineIRBuilder &MIRBuilder) const;
279
280 /// Emit a CSet for an integer compare.
281 ///
282 /// \p DefReg and \p SrcReg are expected to be 32-bit scalar registers.
283 MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
284 MachineIRBuilder &MIRBuilder,
285 Register SrcReg = AArch64::WZR) const;
286 /// Emit a CSet for a FP compare.
287 ///
288 /// \p Dst is expected to be a 32-bit scalar register.
289 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
290 MachineIRBuilder &MIRBuilder) const;
291
292 /// Emit the overflow op for \p Opcode.
293 ///
294 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
295 /// G_USUBO, etc.
296 std::pair<MachineInstr *, AArch64CC::CondCode>
297 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
298 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
299
300 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
301 /// \p IsNegative is true if the test should be "not zero".
302 /// This will also optimize the test bit instruction when possible.
303 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
304 MachineBasicBlock *DstMBB,
305 MachineIRBuilder &MIB) const;
306
307 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
308 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
309 MachineBasicBlock *DestMBB,
310 MachineIRBuilder &MIB) const;
311
312 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
313 // We use these manually instead of using the importer since it doesn't
314 // support SDNodeXForm.
315 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
316 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
317 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
318 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
319
320 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
321 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
322 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
323
324 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
325 unsigned Size) const;
326
selectAddrModeUnscaled8(MachineOperand & Root) const327 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
328 return selectAddrModeUnscaled(Root, 1);
329 }
selectAddrModeUnscaled16(MachineOperand & Root) const330 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
331 return selectAddrModeUnscaled(Root, 2);
332 }
selectAddrModeUnscaled32(MachineOperand & Root) const333 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
334 return selectAddrModeUnscaled(Root, 4);
335 }
selectAddrModeUnscaled64(MachineOperand & Root) const336 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
337 return selectAddrModeUnscaled(Root, 8);
338 }
selectAddrModeUnscaled128(MachineOperand & Root) const339 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
340 return selectAddrModeUnscaled(Root, 16);
341 }
342
343 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
344 /// from complex pattern matchers like selectAddrModeIndexed().
345 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
346 MachineRegisterInfo &MRI) const;
347
348 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
349 unsigned Size) const;
350 template <int Width>
selectAddrModeIndexed(MachineOperand & Root) const351 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
352 return selectAddrModeIndexed(Root, Width / 8);
353 }
354
355 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
356 const MachineRegisterInfo &MRI) const;
357 ComplexRendererFns
358 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
359 unsigned SizeInBytes) const;
360
361 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
362 /// or not a shift + extend should be folded into an addressing mode. Returns
363 /// None when this is not profitable or possible.
364 ComplexRendererFns
365 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
366 MachineOperand &Offset, unsigned SizeInBytes,
367 bool WantsExt) const;
368 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
369 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
370 unsigned SizeInBytes) const;
371 template <int Width>
selectAddrModeXRO(MachineOperand & Root) const372 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
373 return selectAddrModeXRO(Root, Width / 8);
374 }
375
376 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
377 unsigned SizeInBytes) const;
378 template <int Width>
selectAddrModeWRO(MachineOperand & Root) const379 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
380 return selectAddrModeWRO(Root, Width / 8);
381 }
382
383 ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
384 bool AllowROR = false) const;
385
selectArithShiftedRegister(MachineOperand & Root) const386 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
387 return selectShiftedRegister(Root);
388 }
389
selectLogicalShiftedRegister(MachineOperand & Root) const390 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
391 return selectShiftedRegister(Root, true);
392 }
393
394 /// Given an extend instruction, determine the correct shift-extend type for
395 /// that instruction.
396 ///
397 /// If the instruction is going to be used in a load or store, pass
398 /// \p IsLoadStore = true.
399 AArch64_AM::ShiftExtendType
400 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
401 bool IsLoadStore = false) const;
402
403 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
404 ///
405 /// \returns Either \p Reg if no change was necessary, or the new register
406 /// created by moving \p Reg.
407 ///
408 /// Note: This uses emitCopy right now.
409 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
410 MachineIRBuilder &MIB) const;
411
412 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
413
414 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
415 int OpIdx = -1) const;
416 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
417 int OpIdx = -1) const;
418 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
419 int OpIdx = -1) const;
420 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
421 int OpIdx = -1) const;
422 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
423 int OpIdx = -1) const;
424 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
425 int OpIdx = -1) const;
426
427 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
428 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
429
430 // Optimization methods.
431 bool tryOptSelect(MachineInstr &MI);
432 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
433 MachineOperand &Predicate,
434 MachineIRBuilder &MIRBuilder) const;
435
436 /// Return true if \p MI is a load or store of \p NumBytes bytes.
437 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
438
439 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
440 /// register zeroed out. In other words, the result of MI has been explicitly
441 /// zero extended.
442 bool isDef32(const MachineInstr &MI) const;
443
444 const AArch64TargetMachine &TM;
445 const AArch64Subtarget &STI;
446 const AArch64InstrInfo &TII;
447 const AArch64RegisterInfo &TRI;
448 const AArch64RegisterBankInfo &RBI;
449
450 bool ProduceNonFlagSettingCondBr = false;
451
452 // Some cached values used during selection.
453 // We use LR as a live-in register, and we keep track of it here as it can be
454 // clobbered by calls.
455 Register MFReturnAddr;
456
457 MachineIRBuilder MIB;
458
459 #define GET_GLOBALISEL_PREDICATES_DECL
460 #include "AArch64GenGlobalISel.inc"
461 #undef GET_GLOBALISEL_PREDICATES_DECL
462
463 // We declare the temporaries used by selectImpl() in the class to minimize the
464 // cost of constructing placeholder values.
465 #define GET_GLOBALISEL_TEMPORARIES_DECL
466 #include "AArch64GenGlobalISel.inc"
467 #undef GET_GLOBALISEL_TEMPORARIES_DECL
468 };
469
470 } // end anonymous namespace
471
472 #define GET_GLOBALISEL_IMPL
473 #include "AArch64GenGlobalISel.inc"
474 #undef GET_GLOBALISEL_IMPL
475
AArch64InstructionSelector(const AArch64TargetMachine & TM,const AArch64Subtarget & STI,const AArch64RegisterBankInfo & RBI)476 AArch64InstructionSelector::AArch64InstructionSelector(
477 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
478 const AArch64RegisterBankInfo &RBI)
479 : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
480 TRI(*STI.getRegisterInfo()), RBI(RBI),
481 #define GET_GLOBALISEL_PREDICATES_INIT
482 #include "AArch64GenGlobalISel.inc"
483 #undef GET_GLOBALISEL_PREDICATES_INIT
484 #define GET_GLOBALISEL_TEMPORARIES_INIT
485 #include "AArch64GenGlobalISel.inc"
486 #undef GET_GLOBALISEL_TEMPORARIES_INIT
487 {
488 }
489
490 // FIXME: This should be target-independent, inferred from the types declared
491 // for each class in the bank.
492 static const TargetRegisterClass *
getRegClassForTypeOnBank(LLT Ty,const RegisterBank & RB,const RegisterBankInfo & RBI,bool GetAllRegSet=false)493 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
494 const RegisterBankInfo &RBI,
495 bool GetAllRegSet = false) {
496 if (RB.getID() == AArch64::GPRRegBankID) {
497 if (Ty.getSizeInBits() <= 32)
498 return GetAllRegSet ? &AArch64::GPR32allRegClass
499 : &AArch64::GPR32RegClass;
500 if (Ty.getSizeInBits() == 64)
501 return GetAllRegSet ? &AArch64::GPR64allRegClass
502 : &AArch64::GPR64RegClass;
503 if (Ty.getSizeInBits() == 128)
504 return &AArch64::XSeqPairsClassRegClass;
505 return nullptr;
506 }
507
508 if (RB.getID() == AArch64::FPRRegBankID) {
509 switch (Ty.getSizeInBits()) {
510 case 8:
511 return &AArch64::FPR8RegClass;
512 case 16:
513 return &AArch64::FPR16RegClass;
514 case 32:
515 return &AArch64::FPR32RegClass;
516 case 64:
517 return &AArch64::FPR64RegClass;
518 case 128:
519 return &AArch64::FPR128RegClass;
520 }
521 return nullptr;
522 }
523
524 return nullptr;
525 }
526
527 /// Given a register bank, and size in bits, return the smallest register class
528 /// that can represent that combination.
529 static const TargetRegisterClass *
getMinClassForRegBank(const RegisterBank & RB,unsigned SizeInBits,bool GetAllRegSet=false)530 getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
531 bool GetAllRegSet = false) {
532 unsigned RegBankID = RB.getID();
533
534 if (RegBankID == AArch64::GPRRegBankID) {
535 if (SizeInBits <= 32)
536 return GetAllRegSet ? &AArch64::GPR32allRegClass
537 : &AArch64::GPR32RegClass;
538 if (SizeInBits == 64)
539 return GetAllRegSet ? &AArch64::GPR64allRegClass
540 : &AArch64::GPR64RegClass;
541 if (SizeInBits == 128)
542 return &AArch64::XSeqPairsClassRegClass;
543 }
544
545 if (RegBankID == AArch64::FPRRegBankID) {
546 switch (SizeInBits) {
547 default:
548 return nullptr;
549 case 8:
550 return &AArch64::FPR8RegClass;
551 case 16:
552 return &AArch64::FPR16RegClass;
553 case 32:
554 return &AArch64::FPR32RegClass;
555 case 64:
556 return &AArch64::FPR64RegClass;
557 case 128:
558 return &AArch64::FPR128RegClass;
559 }
560 }
561
562 return nullptr;
563 }
564
565 /// Returns the correct subregister to use for a given register class.
getSubRegForClass(const TargetRegisterClass * RC,const TargetRegisterInfo & TRI,unsigned & SubReg)566 static bool getSubRegForClass(const TargetRegisterClass *RC,
567 const TargetRegisterInfo &TRI, unsigned &SubReg) {
568 switch (TRI.getRegSizeInBits(*RC)) {
569 case 8:
570 SubReg = AArch64::bsub;
571 break;
572 case 16:
573 SubReg = AArch64::hsub;
574 break;
575 case 32:
576 if (RC != &AArch64::FPR32RegClass)
577 SubReg = AArch64::sub_32;
578 else
579 SubReg = AArch64::ssub;
580 break;
581 case 64:
582 SubReg = AArch64::dsub;
583 break;
584 default:
585 LLVM_DEBUG(
586 dbgs() << "Couldn't find appropriate subregister for register class.");
587 return false;
588 }
589
590 return true;
591 }
592
593 /// Returns the minimum size the given register bank can hold.
getMinSizeForRegBank(const RegisterBank & RB)594 static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
595 switch (RB.getID()) {
596 case AArch64::GPRRegBankID:
597 return 32;
598 case AArch64::FPRRegBankID:
599 return 8;
600 default:
601 llvm_unreachable("Tried to get minimum size for unknown register bank.");
602 }
603 }
604
605 /// Create a REG_SEQUENCE instruction using the registers in \p Regs.
606 /// Helper function for functions like createDTuple and createQTuple.
607 ///
608 /// \p RegClassIDs - The list of register class IDs available for some tuple of
609 /// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
610 /// expected to contain between 2 and 4 tuple classes.
611 ///
612 /// \p SubRegs - The list of subregister classes associated with each register
613 /// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
614 /// subregister class. The index of each subregister class is expected to
615 /// correspond with the index of each register class.
616 ///
617 /// \returns Either the destination register of REG_SEQUENCE instruction that
618 /// was created, or the 0th element of \p Regs if \p Regs contains a single
619 /// element.
createTuple(ArrayRef<Register> Regs,const unsigned RegClassIDs[],const unsigned SubRegs[],MachineIRBuilder & MIB)620 static Register createTuple(ArrayRef<Register> Regs,
621 const unsigned RegClassIDs[],
622 const unsigned SubRegs[], MachineIRBuilder &MIB) {
623 unsigned NumRegs = Regs.size();
624 if (NumRegs == 1)
625 return Regs[0];
626 assert(NumRegs >= 2 && NumRegs <= 4 &&
627 "Only support between two and 4 registers in a tuple!");
628 const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo();
629 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
630 auto RegSequence =
631 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
632 for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
633 RegSequence.addUse(Regs[I]);
634 RegSequence.addImm(SubRegs[I]);
635 }
636 return RegSequence.getReg(0);
637 }
638
639 /// Create a tuple of D-registers using the registers in \p Regs.
createDTuple(ArrayRef<Register> Regs,MachineIRBuilder & MIB)640 static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
641 static const unsigned RegClassIDs[] = {
642 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
643 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
644 AArch64::dsub2, AArch64::dsub3};
645 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
646 }
647
648 /// Create a tuple of Q-registers using the registers in \p Regs.
createQTuple(ArrayRef<Register> Regs,MachineIRBuilder & MIB)649 static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
650 static const unsigned RegClassIDs[] = {
651 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
652 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
653 AArch64::qsub2, AArch64::qsub3};
654 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
655 }
656
getImmedFromMO(const MachineOperand & Root)657 static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
658 auto &MI = *Root.getParent();
659 auto &MBB = *MI.getParent();
660 auto &MF = *MBB.getParent();
661 auto &MRI = MF.getRegInfo();
662 uint64_t Immed;
663 if (Root.isImm())
664 Immed = Root.getImm();
665 else if (Root.isCImm())
666 Immed = Root.getCImm()->getZExtValue();
667 else if (Root.isReg()) {
668 auto ValAndVReg =
669 getIConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
670 if (!ValAndVReg)
671 return None;
672 Immed = ValAndVReg->Value.getSExtValue();
673 } else
674 return None;
675 return Immed;
676 }
677
678 /// Check whether \p I is a currently unsupported binary operation:
679 /// - it has an unsized type
680 /// - an operand is not a vreg
681 /// - all operands are not in the same bank
682 /// These are checks that should someday live in the verifier, but right now,
683 /// these are mostly limitations of the aarch64 selector.
unsupportedBinOp(const MachineInstr & I,const AArch64RegisterBankInfo & RBI,const MachineRegisterInfo & MRI,const AArch64RegisterInfo & TRI)684 static bool unsupportedBinOp(const MachineInstr &I,
685 const AArch64RegisterBankInfo &RBI,
686 const MachineRegisterInfo &MRI,
687 const AArch64RegisterInfo &TRI) {
688 LLT Ty = MRI.getType(I.getOperand(0).getReg());
689 if (!Ty.isValid()) {
690 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
691 return true;
692 }
693
694 const RegisterBank *PrevOpBank = nullptr;
695 for (auto &MO : I.operands()) {
696 // FIXME: Support non-register operands.
697 if (!MO.isReg()) {
698 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
699 return true;
700 }
701
702 // FIXME: Can generic operations have physical registers operands? If
703 // so, this will need to be taught about that, and we'll need to get the
704 // bank out of the minimal class for the register.
705 // Either way, this needs to be documented (and possibly verified).
706 if (!Register::isVirtualRegister(MO.getReg())) {
707 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
708 return true;
709 }
710
711 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
712 if (!OpBank) {
713 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
714 return true;
715 }
716
717 if (PrevOpBank && OpBank != PrevOpBank) {
718 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
719 return true;
720 }
721 PrevOpBank = OpBank;
722 }
723 return false;
724 }
725
726 /// Select the AArch64 opcode for the basic binary operation \p GenericOpc
727 /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
728 /// and of size \p OpSize.
729 /// \returns \p GenericOpc if the combination is unsupported.
selectBinaryOp(unsigned GenericOpc,unsigned RegBankID,unsigned OpSize)730 static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
731 unsigned OpSize) {
732 switch (RegBankID) {
733 case AArch64::GPRRegBankID:
734 if (OpSize == 32) {
735 switch (GenericOpc) {
736 case TargetOpcode::G_SHL:
737 return AArch64::LSLVWr;
738 case TargetOpcode::G_LSHR:
739 return AArch64::LSRVWr;
740 case TargetOpcode::G_ASHR:
741 return AArch64::ASRVWr;
742 default:
743 return GenericOpc;
744 }
745 } else if (OpSize == 64) {
746 switch (GenericOpc) {
747 case TargetOpcode::G_PTR_ADD:
748 return AArch64::ADDXrr;
749 case TargetOpcode::G_SHL:
750 return AArch64::LSLVXr;
751 case TargetOpcode::G_LSHR:
752 return AArch64::LSRVXr;
753 case TargetOpcode::G_ASHR:
754 return AArch64::ASRVXr;
755 default:
756 return GenericOpc;
757 }
758 }
759 break;
760 case AArch64::FPRRegBankID:
761 switch (OpSize) {
762 case 32:
763 switch (GenericOpc) {
764 case TargetOpcode::G_FADD:
765 return AArch64::FADDSrr;
766 case TargetOpcode::G_FSUB:
767 return AArch64::FSUBSrr;
768 case TargetOpcode::G_FMUL:
769 return AArch64::FMULSrr;
770 case TargetOpcode::G_FDIV:
771 return AArch64::FDIVSrr;
772 default:
773 return GenericOpc;
774 }
775 case 64:
776 switch (GenericOpc) {
777 case TargetOpcode::G_FADD:
778 return AArch64::FADDDrr;
779 case TargetOpcode::G_FSUB:
780 return AArch64::FSUBDrr;
781 case TargetOpcode::G_FMUL:
782 return AArch64::FMULDrr;
783 case TargetOpcode::G_FDIV:
784 return AArch64::FDIVDrr;
785 case TargetOpcode::G_OR:
786 return AArch64::ORRv8i8;
787 default:
788 return GenericOpc;
789 }
790 }
791 break;
792 }
793 return GenericOpc;
794 }
795
796 /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
797 /// appropriate for the (value) register bank \p RegBankID and of memory access
798 /// size \p OpSize. This returns the variant with the base+unsigned-immediate
799 /// addressing mode (e.g., LDRXui).
800 /// \returns \p GenericOpc if the combination is unsupported.
selectLoadStoreUIOp(unsigned GenericOpc,unsigned RegBankID,unsigned OpSize)801 static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
802 unsigned OpSize) {
803 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
804 switch (RegBankID) {
805 case AArch64::GPRRegBankID:
806 switch (OpSize) {
807 case 8:
808 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
809 case 16:
810 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
811 case 32:
812 return isStore ? AArch64::STRWui : AArch64::LDRWui;
813 case 64:
814 return isStore ? AArch64::STRXui : AArch64::LDRXui;
815 }
816 break;
817 case AArch64::FPRRegBankID:
818 switch (OpSize) {
819 case 8:
820 return isStore ? AArch64::STRBui : AArch64::LDRBui;
821 case 16:
822 return isStore ? AArch64::STRHui : AArch64::LDRHui;
823 case 32:
824 return isStore ? AArch64::STRSui : AArch64::LDRSui;
825 case 64:
826 return isStore ? AArch64::STRDui : AArch64::LDRDui;
827 case 128:
828 return isStore ? AArch64::STRQui : AArch64::LDRQui;
829 }
830 break;
831 }
832 return GenericOpc;
833 }
834
835 #ifndef NDEBUG
836 /// Helper function that verifies that we have a valid copy at the end of
837 /// selectCopy. Verifies that the source and dest have the expected sizes and
838 /// then returns true.
isValidCopy(const MachineInstr & I,const RegisterBank & DstBank,const MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,const RegisterBankInfo & RBI)839 static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
840 const MachineRegisterInfo &MRI,
841 const TargetRegisterInfo &TRI,
842 const RegisterBankInfo &RBI) {
843 const Register DstReg = I.getOperand(0).getReg();
844 const Register SrcReg = I.getOperand(1).getReg();
845 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
846 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
847
848 // Make sure the size of the source and dest line up.
849 assert(
850 (DstSize == SrcSize ||
851 // Copies are a mean to setup initial types, the number of
852 // bits may not exactly match.
853 (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
854 // Copies are a mean to copy bits around, as long as we are
855 // on the same register class, that's fine. Otherwise, that
856 // means we need some SUBREG_TO_REG or AND & co.
857 (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
858 "Copy with different width?!");
859
860 // Check the size of the destination.
861 assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
862 "GPRs cannot get more than 64-bit width values");
863
864 return true;
865 }
866 #endif
867
868 /// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
869 /// to \p *To.
870 ///
871 /// E.g "To = COPY SrcReg:SubReg"
copySubReg(MachineInstr & I,MachineRegisterInfo & MRI,const RegisterBankInfo & RBI,Register SrcReg,const TargetRegisterClass * To,unsigned SubReg)872 static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
873 const RegisterBankInfo &RBI, Register SrcReg,
874 const TargetRegisterClass *To, unsigned SubReg) {
875 assert(SrcReg.isValid() && "Expected a valid source register?");
876 assert(To && "Destination register class cannot be null");
877 assert(SubReg && "Expected a valid subregister");
878
879 MachineIRBuilder MIB(I);
880 auto SubRegCopy =
881 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
882 MachineOperand &RegOp = I.getOperand(1);
883 RegOp.setReg(SubRegCopy.getReg(0));
884
885 // It's possible that the destination register won't be constrained. Make
886 // sure that happens.
887 if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
888 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
889
890 return true;
891 }
892
893 /// Helper function to get the source and destination register classes for a
894 /// copy. Returns a std::pair containing the source register class for the
895 /// copy, and the destination register class for the copy. If a register class
896 /// cannot be determined, then it will be nullptr.
897 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
getRegClassesForCopy(MachineInstr & I,const TargetInstrInfo & TII,MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,const RegisterBankInfo & RBI)898 getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
899 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
900 const RegisterBankInfo &RBI) {
901 Register DstReg = I.getOperand(0).getReg();
902 Register SrcReg = I.getOperand(1).getReg();
903 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
904 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
905 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
906 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
907
908 // Special casing for cross-bank copies of s1s. We can technically represent
909 // a 1-bit value with any size of register. The minimum size for a GPR is 32
910 // bits. So, we need to put the FPR on 32 bits as well.
911 //
912 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
913 // then we can pull it into the helpers that get the appropriate class for a
914 // register bank. Or make a new helper that carries along some constraint
915 // information.
916 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
917 SrcSize = DstSize = 32;
918
919 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
920 getMinClassForRegBank(DstRegBank, DstSize, true)};
921 }
922
selectCopy(MachineInstr & I,const TargetInstrInfo & TII,MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,const RegisterBankInfo & RBI)923 static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
924 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
925 const RegisterBankInfo &RBI) {
926 Register DstReg = I.getOperand(0).getReg();
927 Register SrcReg = I.getOperand(1).getReg();
928 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
929 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
930
931 // Find the correct register classes for the source and destination registers.
932 const TargetRegisterClass *SrcRC;
933 const TargetRegisterClass *DstRC;
934 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
935
936 if (!DstRC) {
937 LLVM_DEBUG(dbgs() << "Unexpected dest size "
938 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
939 return false;
940 }
941
942 // A couple helpers below, for making sure that the copy we produce is valid.
943
944 // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
945 // to verify that the src and dst are the same size, since that's handled by
946 // the SUBREG_TO_REG.
947 bool KnownValid = false;
948
949 // Returns true, or asserts if something we don't expect happens. Instead of
950 // returning true, we return isValidCopy() to ensure that we verify the
951 // result.
952 auto CheckCopy = [&]() {
953 // If we have a bitcast or something, we can't have physical registers.
954 assert((I.isCopy() ||
955 (!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&
956 !Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&
957 "No phys reg on generic operator!");
958 bool ValidCopy = true;
959 #ifndef NDEBUG
960 ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI);
961 assert(ValidCopy && "Invalid copy.");
962 #endif
963 (void)KnownValid;
964 return ValidCopy;
965 };
966
967 // Is this a copy? If so, then we may need to insert a subregister copy.
968 if (I.isCopy()) {
969 // Yes. Check if there's anything to fix up.
970 if (!SrcRC) {
971 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
972 return false;
973 }
974
975 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
976 unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
977 unsigned SubReg;
978
979 // If the source bank doesn't support a subregister copy small enough,
980 // then we first need to copy to the destination bank.
981 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
982 const TargetRegisterClass *DstTempRC =
983 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
984 getSubRegForClass(DstRC, TRI, SubReg);
985
986 MachineIRBuilder MIB(I);
987 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
988 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
989 } else if (SrcSize > DstSize) {
990 // If the source register is bigger than the destination we need to
991 // perform a subregister copy.
992 const TargetRegisterClass *SubRegRC =
993 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
994 getSubRegForClass(SubRegRC, TRI, SubReg);
995 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
996 } else if (DstSize > SrcSize) {
997 // If the destination register is bigger than the source we need to do
998 // a promotion using SUBREG_TO_REG.
999 const TargetRegisterClass *PromotionRC =
1000 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1001 getSubRegForClass(SrcRC, TRI, SubReg);
1002
1003 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
1004 BuildMI(*I.getParent(), I, I.getDebugLoc(),
1005 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1006 .addImm(0)
1007 .addUse(SrcReg)
1008 .addImm(SubReg);
1009 MachineOperand &RegOp = I.getOperand(1);
1010 RegOp.setReg(PromoteReg);
1011
1012 // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
1013 KnownValid = true;
1014 }
1015
1016 // If the destination is a physical register, then there's nothing to
1017 // change, so we're done.
1018 if (Register::isPhysicalRegister(DstReg))
1019 return CheckCopy();
1020 }
1021
1022 // No need to constrain SrcReg. It will get constrained when we hit another
1023 // of its use or its defs. Copies do not have constraints.
1024 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1025 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1026 << " operand\n");
1027 return false;
1028 }
1029
1030 // If this a GPR ZEXT that we want to just reduce down into a copy.
1031 // The sizes will be mismatched with the source < 32b but that's ok.
1032 if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1033 I.setDesc(TII.get(AArch64::COPY));
1034 assert(SrcRegBank.getID() == AArch64::GPRRegBankID);
1035 return selectCopy(I, TII, MRI, TRI, RBI);
1036 }
1037
1038 I.setDesc(TII.get(AArch64::COPY));
1039 return CheckCopy();
1040 }
1041
selectFPConvOpc(unsigned GenericOpc,LLT DstTy,LLT SrcTy)1042 static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
1043 if (!DstTy.isScalar() || !SrcTy.isScalar())
1044 return GenericOpc;
1045
1046 const unsigned DstSize = DstTy.getSizeInBits();
1047 const unsigned SrcSize = SrcTy.getSizeInBits();
1048
1049 switch (DstSize) {
1050 case 32:
1051 switch (SrcSize) {
1052 case 32:
1053 switch (GenericOpc) {
1054 case TargetOpcode::G_SITOFP:
1055 return AArch64::SCVTFUWSri;
1056 case TargetOpcode::G_UITOFP:
1057 return AArch64::UCVTFUWSri;
1058 case TargetOpcode::G_FPTOSI:
1059 return AArch64::FCVTZSUWSr;
1060 case TargetOpcode::G_FPTOUI:
1061 return AArch64::FCVTZUUWSr;
1062 default:
1063 return GenericOpc;
1064 }
1065 case 64:
1066 switch (GenericOpc) {
1067 case TargetOpcode::G_SITOFP:
1068 return AArch64::SCVTFUXSri;
1069 case TargetOpcode::G_UITOFP:
1070 return AArch64::UCVTFUXSri;
1071 case TargetOpcode::G_FPTOSI:
1072 return AArch64::FCVTZSUWDr;
1073 case TargetOpcode::G_FPTOUI:
1074 return AArch64::FCVTZUUWDr;
1075 default:
1076 return GenericOpc;
1077 }
1078 default:
1079 return GenericOpc;
1080 }
1081 case 64:
1082 switch (SrcSize) {
1083 case 32:
1084 switch (GenericOpc) {
1085 case TargetOpcode::G_SITOFP:
1086 return AArch64::SCVTFUWDri;
1087 case TargetOpcode::G_UITOFP:
1088 return AArch64::UCVTFUWDri;
1089 case TargetOpcode::G_FPTOSI:
1090 return AArch64::FCVTZSUXSr;
1091 case TargetOpcode::G_FPTOUI:
1092 return AArch64::FCVTZUUXSr;
1093 default:
1094 return GenericOpc;
1095 }
1096 case 64:
1097 switch (GenericOpc) {
1098 case TargetOpcode::G_SITOFP:
1099 return AArch64::SCVTFUXDri;
1100 case TargetOpcode::G_UITOFP:
1101 return AArch64::UCVTFUXDri;
1102 case TargetOpcode::G_FPTOSI:
1103 return AArch64::FCVTZSUXDr;
1104 case TargetOpcode::G_FPTOUI:
1105 return AArch64::FCVTZUUXDr;
1106 default:
1107 return GenericOpc;
1108 }
1109 default:
1110 return GenericOpc;
1111 }
1112 default:
1113 return GenericOpc;
1114 };
1115 return GenericOpc;
1116 }
1117
1118 MachineInstr *
emitSelect(Register Dst,Register True,Register False,AArch64CC::CondCode CC,MachineIRBuilder & MIB) const1119 AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1120 Register False, AArch64CC::CondCode CC,
1121 MachineIRBuilder &MIB) const {
1122 MachineRegisterInfo &MRI = *MIB.getMRI();
1123 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
1124 RBI.getRegBank(True, MRI, TRI)->getID() &&
1125 "Expected both select operands to have the same regbank?");
1126 LLT Ty = MRI.getType(True);
1127 if (Ty.isVector())
1128 return nullptr;
1129 const unsigned Size = Ty.getSizeInBits();
1130 assert((Size == 32 || Size == 64) &&
1131 "Expected 32 bit or 64 bit select only?");
1132 const bool Is32Bit = Size == 32;
1133 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1134 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1135 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1136 constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
1137 return &*FCSel;
1138 }
1139
1140 // By default, we'll try and emit a CSEL.
1141 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1142 bool Optimized = false;
1143 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1144 &Optimized](Register &Reg, Register &OtherReg,
1145 bool Invert) {
1146 if (Optimized)
1147 return false;
1148
1149 // Attempt to fold:
1150 //
1151 // %sub = G_SUB 0, %x
1152 // %select = G_SELECT cc, %reg, %sub
1153 //
1154 // Into:
1155 // %select = CSNEG %reg, %x, cc
1156 Register MatchReg;
1157 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1158 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1159 Reg = MatchReg;
1160 if (Invert) {
1161 CC = AArch64CC::getInvertedCondCode(CC);
1162 std::swap(Reg, OtherReg);
1163 }
1164 return true;
1165 }
1166
1167 // Attempt to fold:
1168 //
1169 // %xor = G_XOR %x, -1
1170 // %select = G_SELECT cc, %reg, %xor
1171 //
1172 // Into:
1173 // %select = CSINV %reg, %x, cc
1174 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1175 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1176 Reg = MatchReg;
1177 if (Invert) {
1178 CC = AArch64CC::getInvertedCondCode(CC);
1179 std::swap(Reg, OtherReg);
1180 }
1181 return true;
1182 }
1183
1184 // Attempt to fold:
1185 //
1186 // %add = G_ADD %x, 1
1187 // %select = G_SELECT cc, %reg, %add
1188 //
1189 // Into:
1190 // %select = CSINC %reg, %x, cc
1191 if (mi_match(Reg, MRI,
1192 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1193 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1194 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1195 Reg = MatchReg;
1196 if (Invert) {
1197 CC = AArch64CC::getInvertedCondCode(CC);
1198 std::swap(Reg, OtherReg);
1199 }
1200 return true;
1201 }
1202
1203 return false;
1204 };
1205
1206 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1207 // true/false values are constants.
1208 // FIXME: All of these patterns already exist in tablegen. We should be
1209 // able to import these.
1210 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1211 &Optimized]() {
1212 if (Optimized)
1213 return false;
1214 auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
1215 auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
1216 if (!TrueCst && !FalseCst)
1217 return false;
1218
1219 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1220 if (TrueCst && FalseCst) {
1221 int64_t T = TrueCst->Value.getSExtValue();
1222 int64_t F = FalseCst->Value.getSExtValue();
1223
1224 if (T == 0 && F == 1) {
1225 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1226 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1227 True = ZReg;
1228 False = ZReg;
1229 return true;
1230 }
1231
1232 if (T == 0 && F == -1) {
1233 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1234 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1235 True = ZReg;
1236 False = ZReg;
1237 return true;
1238 }
1239 }
1240
1241 if (TrueCst) {
1242 int64_t T = TrueCst->Value.getSExtValue();
1243 if (T == 1) {
1244 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1245 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1246 True = False;
1247 False = ZReg;
1248 CC = AArch64CC::getInvertedCondCode(CC);
1249 return true;
1250 }
1251
1252 if (T == -1) {
1253 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1254 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1255 True = False;
1256 False = ZReg;
1257 CC = AArch64CC::getInvertedCondCode(CC);
1258 return true;
1259 }
1260 }
1261
1262 if (FalseCst) {
1263 int64_t F = FalseCst->Value.getSExtValue();
1264 if (F == 1) {
1265 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1266 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1267 False = ZReg;
1268 return true;
1269 }
1270
1271 if (F == -1) {
1272 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1273 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1274 False = ZReg;
1275 return true;
1276 }
1277 }
1278 return false;
1279 };
1280
1281 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1282 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1283 Optimized |= TryOptSelectCst();
1284 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1285 constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
1286 return &*SelectInst;
1287 }
1288
changeICMPPredToAArch64CC(CmpInst::Predicate P)1289 static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
1290 switch (P) {
1291 default:
1292 llvm_unreachable("Unknown condition code!");
1293 case CmpInst::ICMP_NE:
1294 return AArch64CC::NE;
1295 case CmpInst::ICMP_EQ:
1296 return AArch64CC::EQ;
1297 case CmpInst::ICMP_SGT:
1298 return AArch64CC::GT;
1299 case CmpInst::ICMP_SGE:
1300 return AArch64CC::GE;
1301 case CmpInst::ICMP_SLT:
1302 return AArch64CC::LT;
1303 case CmpInst::ICMP_SLE:
1304 return AArch64CC::LE;
1305 case CmpInst::ICMP_UGT:
1306 return AArch64CC::HI;
1307 case CmpInst::ICMP_UGE:
1308 return AArch64CC::HS;
1309 case CmpInst::ICMP_ULT:
1310 return AArch64CC::LO;
1311 case CmpInst::ICMP_ULE:
1312 return AArch64CC::LS;
1313 }
1314 }
1315
1316 /// Return a register which can be used as a bit to test in a TB(N)Z.
getTestBitReg(Register Reg,uint64_t & Bit,bool & Invert,MachineRegisterInfo & MRI)1317 static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1318 MachineRegisterInfo &MRI) {
1319 assert(Reg.isValid() && "Expected valid register!");
1320 bool HasZext = false;
1321 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1322 unsigned Opc = MI->getOpcode();
1323
1324 if (!MI->getOperand(0).isReg() ||
1325 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1326 break;
1327
1328 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1329 //
1330 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1331 // on the truncated x is the same as the bit number on x.
1332 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1333 Opc == TargetOpcode::G_TRUNC) {
1334 if (Opc == TargetOpcode::G_ZEXT)
1335 HasZext = true;
1336
1337 Register NextReg = MI->getOperand(1).getReg();
1338 // Did we find something worth folding?
1339 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1340 break;
1341
1342 // NextReg is worth folding. Keep looking.
1343 Reg = NextReg;
1344 continue;
1345 }
1346
1347 // Attempt to find a suitable operation with a constant on one side.
1348 Optional<uint64_t> C;
1349 Register TestReg;
1350 switch (Opc) {
1351 default:
1352 break;
1353 case TargetOpcode::G_AND:
1354 case TargetOpcode::G_XOR: {
1355 TestReg = MI->getOperand(1).getReg();
1356 Register ConstantReg = MI->getOperand(2).getReg();
1357 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1358 if (!VRegAndVal) {
1359 // AND commutes, check the other side for a constant.
1360 // FIXME: Can we canonicalize the constant so that it's always on the
1361 // same side at some point earlier?
1362 std::swap(ConstantReg, TestReg);
1363 VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1364 }
1365 if (VRegAndVal) {
1366 if (HasZext)
1367 C = VRegAndVal->Value.getZExtValue();
1368 else
1369 C = VRegAndVal->Value.getSExtValue();
1370 }
1371 break;
1372 }
1373 case TargetOpcode::G_ASHR:
1374 case TargetOpcode::G_LSHR:
1375 case TargetOpcode::G_SHL: {
1376 TestReg = MI->getOperand(1).getReg();
1377 auto VRegAndVal =
1378 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1379 if (VRegAndVal)
1380 C = VRegAndVal->Value.getSExtValue();
1381 break;
1382 }
1383 }
1384
1385 // Didn't find a constant or viable register. Bail out of the loop.
1386 if (!C || !TestReg.isValid())
1387 break;
1388
1389 // We found a suitable instruction with a constant. Check to see if we can
1390 // walk through the instruction.
1391 Register NextReg;
1392 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1393 switch (Opc) {
1394 default:
1395 break;
1396 case TargetOpcode::G_AND:
1397 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1398 if ((*C >> Bit) & 1)
1399 NextReg = TestReg;
1400 break;
1401 case TargetOpcode::G_SHL:
1402 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1403 // the type of the register.
1404 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1405 NextReg = TestReg;
1406 Bit = Bit - *C;
1407 }
1408 break;
1409 case TargetOpcode::G_ASHR:
1410 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1411 // in x
1412 NextReg = TestReg;
1413 Bit = Bit + *C;
1414 if (Bit >= TestRegSize)
1415 Bit = TestRegSize - 1;
1416 break;
1417 case TargetOpcode::G_LSHR:
1418 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1419 if ((Bit + *C) < TestRegSize) {
1420 NextReg = TestReg;
1421 Bit = Bit + *C;
1422 }
1423 break;
1424 case TargetOpcode::G_XOR:
1425 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1426 // appropriate.
1427 //
1428 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1429 //
1430 // tbz x', b -> tbnz x, b
1431 //
1432 // Because x' only has the b-th bit set if x does not.
1433 if ((*C >> Bit) & 1)
1434 Invert = !Invert;
1435 NextReg = TestReg;
1436 break;
1437 }
1438
1439 // Check if we found anything worth folding.
1440 if (!NextReg.isValid())
1441 return Reg;
1442 Reg = NextReg;
1443 }
1444
1445 return Reg;
1446 }
1447
emitTestBit(Register TestReg,uint64_t Bit,bool IsNegative,MachineBasicBlock * DstMBB,MachineIRBuilder & MIB) const1448 MachineInstr *AArch64InstructionSelector::emitTestBit(
1449 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1450 MachineIRBuilder &MIB) const {
1451 assert(TestReg.isValid());
1452 assert(ProduceNonFlagSettingCondBr &&
1453 "Cannot emit TB(N)Z with speculation tracking!");
1454 MachineRegisterInfo &MRI = *MIB.getMRI();
1455
1456 // Attempt to optimize the test bit by walking over instructions.
1457 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1458 LLT Ty = MRI.getType(TestReg);
1459 unsigned Size = Ty.getSizeInBits();
1460 assert(!Ty.isVector() && "Expected a scalar!");
1461 assert(Bit < 64 && "Bit is too large!");
1462
1463 // When the test register is a 64-bit register, we have to narrow to make
1464 // TBNZW work.
1465 bool UseWReg = Bit < 32;
1466 unsigned NecessarySize = UseWReg ? 32 : 64;
1467 if (Size != NecessarySize)
1468 TestReg = moveScalarRegClass(
1469 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1470 MIB);
1471
1472 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1473 {AArch64::TBZW, AArch64::TBNZW}};
1474 unsigned Opc = OpcTable[UseWReg][IsNegative];
1475 auto TestBitMI =
1476 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1477 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1478 return &*TestBitMI;
1479 }
1480
tryOptAndIntoCompareBranch(MachineInstr & AndInst,bool Invert,MachineBasicBlock * DstMBB,MachineIRBuilder & MIB) const1481 bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1482 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1483 MachineIRBuilder &MIB) const {
1484 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
1485 // Given something like this:
1486 //
1487 // %x = ...Something...
1488 // %one = G_CONSTANT i64 1
1489 // %zero = G_CONSTANT i64 0
1490 // %and = G_AND %x, %one
1491 // %cmp = G_ICMP intpred(ne), %and, %zero
1492 // %cmp_trunc = G_TRUNC %cmp
1493 // G_BRCOND %cmp_trunc, %bb.3
1494 //
1495 // We want to try and fold the AND into the G_BRCOND and produce either a
1496 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1497 //
1498 // In this case, we'd get
1499 //
1500 // TBNZ %x %bb.3
1501 //
1502
1503 // Check if the AND has a constant on its RHS which we can use as a mask.
1504 // If it's a power of 2, then it's the same as checking a specific bit.
1505 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1506 auto MaybeBit = getIConstantVRegValWithLookThrough(
1507 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1508 if (!MaybeBit)
1509 return false;
1510
1511 int32_t Bit = MaybeBit->Value.exactLogBase2();
1512 if (Bit < 0)
1513 return false;
1514
1515 Register TestReg = AndInst.getOperand(1).getReg();
1516
1517 // Emit a TB(N)Z.
1518 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1519 return true;
1520 }
1521
emitCBZ(Register CompareReg,bool IsNegative,MachineBasicBlock * DestMBB,MachineIRBuilder & MIB) const1522 MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1523 bool IsNegative,
1524 MachineBasicBlock *DestMBB,
1525 MachineIRBuilder &MIB) const {
1526 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
1527 MachineRegisterInfo &MRI = *MIB.getMRI();
1528 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
1529 AArch64::GPRRegBankID &&
1530 "Expected GPRs only?");
1531 auto Ty = MRI.getType(CompareReg);
1532 unsigned Width = Ty.getSizeInBits();
1533 assert(!Ty.isVector() && "Expected scalar only?");
1534 assert(Width <= 64 && "Expected width to be at most 64?");
1535 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1536 {AArch64::CBNZW, AArch64::CBNZX}};
1537 unsigned Opc = OpcTable[IsNegative][Width == 64];
1538 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1539 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1540 return &*BranchMI;
1541 }
1542
selectCompareBranchFedByFCmp(MachineInstr & I,MachineInstr & FCmp,MachineIRBuilder & MIB) const1543 bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1544 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1545 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
1546 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1547 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1548 // totally clean. Some of them require two branches to implement.
1549 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1550 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1551 Pred);
1552 AArch64CC::CondCode CC1, CC2;
1553 changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
1554 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1555 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1556 if (CC2 != AArch64CC::AL)
1557 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1558 I.eraseFromParent();
1559 return true;
1560 }
1561
tryOptCompareBranchFedByICmp(MachineInstr & I,MachineInstr & ICmp,MachineIRBuilder & MIB) const1562 bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1563 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1564 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1565 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1566 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1567 //
1568 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1569 // instructions will not be produced, as they are conditional branch
1570 // instructions that do not set flags.
1571 if (!ProduceNonFlagSettingCondBr)
1572 return false;
1573
1574 MachineRegisterInfo &MRI = *MIB.getMRI();
1575 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1576 auto Pred =
1577 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1578 Register LHS = ICmp.getOperand(2).getReg();
1579 Register RHS = ICmp.getOperand(3).getReg();
1580
1581 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1582 auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1583 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1584
1585 // When we can emit a TB(N)Z, prefer that.
1586 //
1587 // Handle non-commutative condition codes first.
1588 // Note that we don't want to do this when we have a G_AND because it can
1589 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1590 if (VRegAndVal && !AndInst) {
1591 int64_t C = VRegAndVal->Value.getSExtValue();
1592
1593 // When we have a greater-than comparison, we can just test if the msb is
1594 // zero.
1595 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1596 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1597 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1598 I.eraseFromParent();
1599 return true;
1600 }
1601
1602 // When we have a less than comparison, we can just test if the msb is not
1603 // zero.
1604 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1605 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1606 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1607 I.eraseFromParent();
1608 return true;
1609 }
1610 }
1611
1612 // Attempt to handle commutative condition codes. Right now, that's only
1613 // eq/ne.
1614 if (ICmpInst::isEquality(Pred)) {
1615 if (!VRegAndVal) {
1616 std::swap(RHS, LHS);
1617 VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1618 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1619 }
1620
1621 if (VRegAndVal && VRegAndVal->Value == 0) {
1622 // If there's a G_AND feeding into this branch, try to fold it away by
1623 // emitting a TB(N)Z instead.
1624 //
1625 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1626 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1627 // would be redundant.
1628 if (AndInst &&
1629 tryOptAndIntoCompareBranch(
1630 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1631 I.eraseFromParent();
1632 return true;
1633 }
1634
1635 // Otherwise, try to emit a CB(N)Z instead.
1636 auto LHSTy = MRI.getType(LHS);
1637 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1638 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1639 I.eraseFromParent();
1640 return true;
1641 }
1642 }
1643 }
1644
1645 return false;
1646 }
1647
selectCompareBranchFedByICmp(MachineInstr & I,MachineInstr & ICmp,MachineIRBuilder & MIB) const1648 bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1649 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1650 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1651 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1652 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1653 return true;
1654
1655 // Couldn't optimize. Emit a compare + a Bcc.
1656 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1657 auto PredOp = ICmp.getOperand(1);
1658 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1659 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
1660 static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1661 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1662 I.eraseFromParent();
1663 return true;
1664 }
1665
selectCompareBranch(MachineInstr & I,MachineFunction & MF,MachineRegisterInfo & MRI)1666 bool AArch64InstructionSelector::selectCompareBranch(
1667 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) {
1668 Register CondReg = I.getOperand(0).getReg();
1669 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1670 if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) {
1671 CondReg = CCMI->getOperand(1).getReg();
1672 CCMI = MRI.getVRegDef(CondReg);
1673 }
1674
1675 // Try to select the G_BRCOND using whatever is feeding the condition if
1676 // possible.
1677 unsigned CCMIOpc = CCMI->getOpcode();
1678 if (CCMIOpc == TargetOpcode::G_FCMP)
1679 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1680 if (CCMIOpc == TargetOpcode::G_ICMP)
1681 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1682
1683 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1684 // instructions will not be produced, as they are conditional branch
1685 // instructions that do not set flags.
1686 if (ProduceNonFlagSettingCondBr) {
1687 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1688 I.getOperand(1).getMBB(), MIB);
1689 I.eraseFromParent();
1690 return true;
1691 }
1692
1693 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1694 auto TstMI =
1695 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1696 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
1697 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1698 .addImm(AArch64CC::EQ)
1699 .addMBB(I.getOperand(1).getMBB());
1700 I.eraseFromParent();
1701 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1702 }
1703
1704 /// Returns the element immediate value of a vector shift operand if found.
1705 /// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
getVectorShiftImm(Register Reg,MachineRegisterInfo & MRI)1706 static Optional<int64_t> getVectorShiftImm(Register Reg,
1707 MachineRegisterInfo &MRI) {
1708 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1709 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1710 assert(OpMI && "Expected to find a vreg def for vector shift operand");
1711 return getAArch64VectorSplatScalar(*OpMI, MRI);
1712 }
1713
1714 /// Matches and returns the shift immediate value for a SHL instruction given
1715 /// a shift operand.
getVectorSHLImm(LLT SrcTy,Register Reg,MachineRegisterInfo & MRI)1716 static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) {
1717 Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1718 if (!ShiftImm)
1719 return None;
1720 // Check the immediate is in range for a SHL.
1721 int64_t Imm = *ShiftImm;
1722 if (Imm < 0)
1723 return None;
1724 switch (SrcTy.getElementType().getSizeInBits()) {
1725 default:
1726 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1727 return None;
1728 case 8:
1729 if (Imm > 7)
1730 return None;
1731 break;
1732 case 16:
1733 if (Imm > 15)
1734 return None;
1735 break;
1736 case 32:
1737 if (Imm > 31)
1738 return None;
1739 break;
1740 case 64:
1741 if (Imm > 63)
1742 return None;
1743 break;
1744 }
1745 return Imm;
1746 }
1747
selectVectorSHL(MachineInstr & I,MachineRegisterInfo & MRI)1748 bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1749 MachineRegisterInfo &MRI) {
1750 assert(I.getOpcode() == TargetOpcode::G_SHL);
1751 Register DstReg = I.getOperand(0).getReg();
1752 const LLT Ty = MRI.getType(DstReg);
1753 Register Src1Reg = I.getOperand(1).getReg();
1754 Register Src2Reg = I.getOperand(2).getReg();
1755
1756 if (!Ty.isVector())
1757 return false;
1758
1759 // Check if we have a vector of constants on RHS that we can select as the
1760 // immediate form.
1761 Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1762
1763 unsigned Opc = 0;
1764 if (Ty == LLT::fixed_vector(2, 64)) {
1765 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1766 } else if (Ty == LLT::fixed_vector(4, 32)) {
1767 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1768 } else if (Ty == LLT::fixed_vector(2, 32)) {
1769 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1770 } else if (Ty == LLT::fixed_vector(4, 16)) {
1771 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1772 } else if (Ty == LLT::fixed_vector(8, 16)) {
1773 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1774 } else if (Ty == LLT::fixed_vector(16, 8)) {
1775 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1776 } else if (Ty == LLT::fixed_vector(8, 8)) {
1777 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1778 } else {
1779 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1780 return false;
1781 }
1782
1783 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1784 if (ImmVal)
1785 Shl.addImm(*ImmVal);
1786 else
1787 Shl.addUse(Src2Reg);
1788 constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
1789 I.eraseFromParent();
1790 return true;
1791 }
1792
selectVectorAshrLshr(MachineInstr & I,MachineRegisterInfo & MRI)1793 bool AArch64InstructionSelector::selectVectorAshrLshr(
1794 MachineInstr &I, MachineRegisterInfo &MRI) {
1795 assert(I.getOpcode() == TargetOpcode::G_ASHR ||
1796 I.getOpcode() == TargetOpcode::G_LSHR);
1797 Register DstReg = I.getOperand(0).getReg();
1798 const LLT Ty = MRI.getType(DstReg);
1799 Register Src1Reg = I.getOperand(1).getReg();
1800 Register Src2Reg = I.getOperand(2).getReg();
1801
1802 if (!Ty.isVector())
1803 return false;
1804
1805 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1806
1807 // We expect the immediate case to be lowered in the PostLegalCombiner to
1808 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1809
1810 // There is not a shift right register instruction, but the shift left
1811 // register instruction takes a signed value, where negative numbers specify a
1812 // right shift.
1813
1814 unsigned Opc = 0;
1815 unsigned NegOpc = 0;
1816 const TargetRegisterClass *RC =
1817 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
1818 if (Ty == LLT::fixed_vector(2, 64)) {
1819 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1820 NegOpc = AArch64::NEGv2i64;
1821 } else if (Ty == LLT::fixed_vector(4, 32)) {
1822 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1823 NegOpc = AArch64::NEGv4i32;
1824 } else if (Ty == LLT::fixed_vector(2, 32)) {
1825 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1826 NegOpc = AArch64::NEGv2i32;
1827 } else if (Ty == LLT::fixed_vector(4, 16)) {
1828 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1829 NegOpc = AArch64::NEGv4i16;
1830 } else if (Ty == LLT::fixed_vector(8, 16)) {
1831 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1832 NegOpc = AArch64::NEGv8i16;
1833 } else if (Ty == LLT::fixed_vector(16, 8)) {
1834 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1835 NegOpc = AArch64::NEGv16i8;
1836 } else if (Ty == LLT::fixed_vector(8, 8)) {
1837 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1838 NegOpc = AArch64::NEGv8i8;
1839 } else {
1840 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1841 return false;
1842 }
1843
1844 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1845 constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1846 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1847 constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1848 I.eraseFromParent();
1849 return true;
1850 }
1851
selectVaStartAAPCS(MachineInstr & I,MachineFunction & MF,MachineRegisterInfo & MRI) const1852 bool AArch64InstructionSelector::selectVaStartAAPCS(
1853 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1854 return false;
1855 }
1856
selectVaStartDarwin(MachineInstr & I,MachineFunction & MF,MachineRegisterInfo & MRI) const1857 bool AArch64InstructionSelector::selectVaStartDarwin(
1858 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1859 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1860 Register ListReg = I.getOperand(0).getReg();
1861
1862 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1863
1864 auto MIB =
1865 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1866 .addDef(ArgsAddrReg)
1867 .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1868 .addImm(0)
1869 .addImm(0);
1870
1871 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1872
1873 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1874 .addUse(ArgsAddrReg)
1875 .addUse(ListReg)
1876 .addImm(0)
1877 .addMemOperand(*I.memoperands_begin());
1878
1879 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1880 I.eraseFromParent();
1881 return true;
1882 }
1883
materializeLargeCMVal(MachineInstr & I,const Value * V,unsigned OpFlags)1884 void AArch64InstructionSelector::materializeLargeCMVal(
1885 MachineInstr &I, const Value *V, unsigned OpFlags) {
1886 MachineBasicBlock &MBB = *I.getParent();
1887 MachineFunction &MF = *MBB.getParent();
1888 MachineRegisterInfo &MRI = MF.getRegInfo();
1889
1890 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1891 MovZ->addOperand(MF, I.getOperand(1));
1892 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1893 AArch64II::MO_NC);
1894 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1895 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1896
1897 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1898 Register ForceDstReg) {
1899 Register DstReg = ForceDstReg
1900 ? ForceDstReg
1901 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1902 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1903 if (auto *GV = dyn_cast<GlobalValue>(V)) {
1904 MovI->addOperand(MF, MachineOperand::CreateGA(
1905 GV, MovZ->getOperand(1).getOffset(), Flags));
1906 } else {
1907 MovI->addOperand(
1908 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1909 MovZ->getOperand(1).getOffset(), Flags));
1910 }
1911 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1912 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
1913 return DstReg;
1914 };
1915 Register DstReg = BuildMovK(MovZ.getReg(0),
1916 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
1917 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1918 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1919 }
1920
preISelLower(MachineInstr & I)1921 bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
1922 MachineBasicBlock &MBB = *I.getParent();
1923 MachineFunction &MF = *MBB.getParent();
1924 MachineRegisterInfo &MRI = MF.getRegInfo();
1925
1926 switch (I.getOpcode()) {
1927 case TargetOpcode::G_SHL:
1928 case TargetOpcode::G_ASHR:
1929 case TargetOpcode::G_LSHR: {
1930 // These shifts are legalized to have 64 bit shift amounts because we want
1931 // to take advantage of the existing imported selection patterns that assume
1932 // the immediates are s64s. However, if the shifted type is 32 bits and for
1933 // some reason we receive input GMIR that has an s64 shift amount that's not
1934 // a G_CONSTANT, insert a truncate so that we can still select the s32
1935 // register-register variant.
1936 Register SrcReg = I.getOperand(1).getReg();
1937 Register ShiftReg = I.getOperand(2).getReg();
1938 const LLT ShiftTy = MRI.getType(ShiftReg);
1939 const LLT SrcTy = MRI.getType(SrcReg);
1940 if (SrcTy.isVector())
1941 return false;
1942 assert(!ShiftTy.isVector() && "unexpected vector shift ty");
1943 if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
1944 return false;
1945 auto *AmtMI = MRI.getVRegDef(ShiftReg);
1946 assert(AmtMI && "could not find a vreg definition for shift amount");
1947 if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
1948 // Insert a subregister copy to implement a 64->32 trunc
1949 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
1950 .addReg(ShiftReg, 0, AArch64::sub_32);
1951 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1952 I.getOperand(2).setReg(Trunc.getReg(0));
1953 }
1954 return true;
1955 }
1956 case TargetOpcode::G_STORE: {
1957 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
1958 MachineOperand &SrcOp = I.getOperand(0);
1959 if (MRI.getType(SrcOp.getReg()).isPointer()) {
1960 // Allow matching with imported patterns for stores of pointers. Unlike
1961 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
1962 // and constrain.
1963 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
1964 Register NewSrc = Copy.getReg(0);
1965 SrcOp.setReg(NewSrc);
1966 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
1967 Changed = true;
1968 }
1969 return Changed;
1970 }
1971 case TargetOpcode::G_PTR_ADD:
1972 return convertPtrAddToAdd(I, MRI);
1973 case TargetOpcode::G_LOAD: {
1974 // For scalar loads of pointers, we try to convert the dest type from p0
1975 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
1976 // conversion, this should be ok because all users should have been
1977 // selected already, so the type doesn't matter for them.
1978 Register DstReg = I.getOperand(0).getReg();
1979 const LLT DstTy = MRI.getType(DstReg);
1980 if (!DstTy.isPointer())
1981 return false;
1982 MRI.setType(DstReg, LLT::scalar(64));
1983 return true;
1984 }
1985 case AArch64::G_DUP: {
1986 // Convert the type from p0 to s64 to help selection.
1987 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1988 if (!DstTy.getElementType().isPointer())
1989 return false;
1990 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
1991 MRI.setType(I.getOperand(0).getReg(),
1992 DstTy.changeElementType(LLT::scalar(64)));
1993 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
1994 I.getOperand(1).setReg(NewSrc.getReg(0));
1995 return true;
1996 }
1997 case TargetOpcode::G_UITOFP:
1998 case TargetOpcode::G_SITOFP: {
1999 // If both source and destination regbanks are FPR, then convert the opcode
2000 // to G_SITOF so that the importer can select it to an fpr variant.
2001 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2002 // copy.
2003 Register SrcReg = I.getOperand(1).getReg();
2004 LLT SrcTy = MRI.getType(SrcReg);
2005 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2006 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2007 return false;
2008
2009 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2010 if (I.getOpcode() == TargetOpcode::G_SITOFP)
2011 I.setDesc(TII.get(AArch64::G_SITOF));
2012 else
2013 I.setDesc(TII.get(AArch64::G_UITOF));
2014 return true;
2015 }
2016 return false;
2017 }
2018 default:
2019 return false;
2020 }
2021 }
2022
2023 /// This lowering tries to look for G_PTR_ADD instructions and then converts
2024 /// them to a standard G_ADD with a COPY on the source.
2025 ///
2026 /// The motivation behind this is to expose the add semantics to the imported
2027 /// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2028 /// because the selector works bottom up, uses before defs. By the time we
2029 /// end up trying to select a G_PTR_ADD, we should have already attempted to
2030 /// fold this into addressing modes and were therefore unsuccessful.
convertPtrAddToAdd(MachineInstr & I,MachineRegisterInfo & MRI)2031 bool AArch64InstructionSelector::convertPtrAddToAdd(
2032 MachineInstr &I, MachineRegisterInfo &MRI) {
2033 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
2034 Register DstReg = I.getOperand(0).getReg();
2035 Register AddOp1Reg = I.getOperand(1).getReg();
2036 const LLT PtrTy = MRI.getType(DstReg);
2037 if (PtrTy.getAddressSpace() != 0)
2038 return false;
2039
2040 const LLT CastPtrTy =
2041 PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
2042 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2043 // Set regbanks on the registers.
2044 if (PtrTy.isVector())
2045 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2046 else
2047 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2048
2049 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2050 // %dst(intty) = G_ADD %intbase, off
2051 I.setDesc(TII.get(TargetOpcode::G_ADD));
2052 MRI.setType(DstReg, CastPtrTy);
2053 I.getOperand(1).setReg(PtrToInt.getReg(0));
2054 if (!select(*PtrToInt)) {
2055 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
2056 return false;
2057 }
2058
2059 // Also take the opportunity here to try to do some optimization.
2060 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2061 Register NegatedReg;
2062 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2063 return true;
2064 I.getOperand(2).setReg(NegatedReg);
2065 I.setDesc(TII.get(TargetOpcode::G_SUB));
2066 return true;
2067 }
2068
earlySelectSHL(MachineInstr & I,MachineRegisterInfo & MRI)2069 bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2070 MachineRegisterInfo &MRI) {
2071 // We try to match the immediate variant of LSL, which is actually an alias
2072 // for a special case of UBFM. Otherwise, we fall back to the imported
2073 // selector which will match the register variant.
2074 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
2075 const auto &MO = I.getOperand(2);
2076 auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
2077 if (!VRegAndVal)
2078 return false;
2079
2080 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2081 if (DstTy.isVector())
2082 return false;
2083 bool Is64Bit = DstTy.getSizeInBits() == 64;
2084 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2085 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2086
2087 if (!Imm1Fn || !Imm2Fn)
2088 return false;
2089
2090 auto NewI =
2091 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2092 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2093
2094 for (auto &RenderFn : *Imm1Fn)
2095 RenderFn(NewI);
2096 for (auto &RenderFn : *Imm2Fn)
2097 RenderFn(NewI);
2098
2099 I.eraseFromParent();
2100 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2101 }
2102
contractCrossBankCopyIntoStore(MachineInstr & I,MachineRegisterInfo & MRI)2103 bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2104 MachineInstr &I, MachineRegisterInfo &MRI) {
2105 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
2106 // If we're storing a scalar, it doesn't matter what register bank that
2107 // scalar is on. All that matters is the size.
2108 //
2109 // So, if we see something like this (with a 32-bit scalar as an example):
2110 //
2111 // %x:gpr(s32) = ... something ...
2112 // %y:fpr(s32) = COPY %x:gpr(s32)
2113 // G_STORE %y:fpr(s32)
2114 //
2115 // We can fix this up into something like this:
2116 //
2117 // G_STORE %x:gpr(s32)
2118 //
2119 // And then continue the selection process normally.
2120 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2121 if (!DefDstReg.isValid())
2122 return false;
2123 LLT DefDstTy = MRI.getType(DefDstReg);
2124 Register StoreSrcReg = I.getOperand(0).getReg();
2125 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2126
2127 // If we get something strange like a physical register, then we shouldn't
2128 // go any further.
2129 if (!DefDstTy.isValid())
2130 return false;
2131
2132 // Are the source and dst types the same size?
2133 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2134 return false;
2135
2136 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2137 RBI.getRegBank(DefDstReg, MRI, TRI))
2138 return false;
2139
2140 // We have a cross-bank copy, which is entering a store. Let's fold it.
2141 I.getOperand(0).setReg(DefDstReg);
2142 return true;
2143 }
2144
earlySelect(MachineInstr & I)2145 bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2146 assert(I.getParent() && "Instruction should be in a basic block!");
2147 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2148
2149 MachineBasicBlock &MBB = *I.getParent();
2150 MachineFunction &MF = *MBB.getParent();
2151 MachineRegisterInfo &MRI = MF.getRegInfo();
2152
2153 switch (I.getOpcode()) {
2154 case AArch64::G_DUP: {
2155 // Before selecting a DUP instruction, check if it is better selected as a
2156 // MOV or load from a constant pool.
2157 Register Src = I.getOperand(1).getReg();
2158 auto ValAndVReg = getIConstantVRegValWithLookThrough(Src, MRI);
2159 if (!ValAndVReg)
2160 return false;
2161 LLVMContext &Ctx = MF.getFunction().getContext();
2162 Register Dst = I.getOperand(0).getReg();
2163 auto *CV = ConstantDataVector::getSplat(
2164 MRI.getType(Dst).getNumElements(),
2165 ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
2166 ValAndVReg->Value));
2167 if (!emitConstantVector(Dst, CV, MIB, MRI))
2168 return false;
2169 I.eraseFromParent();
2170 return true;
2171 }
2172 case TargetOpcode::G_SEXT:
2173 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2174 // over a normal extend.
2175 if (selectUSMovFromExtend(I, MRI))
2176 return true;
2177 return false;
2178 case TargetOpcode::G_BR:
2179 return false;
2180 case TargetOpcode::G_SHL:
2181 return earlySelectSHL(I, MRI);
2182 case TargetOpcode::G_CONSTANT: {
2183 bool IsZero = false;
2184 if (I.getOperand(1).isCImm())
2185 IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
2186 else if (I.getOperand(1).isImm())
2187 IsZero = I.getOperand(1).getImm() == 0;
2188
2189 if (!IsZero)
2190 return false;
2191
2192 Register DefReg = I.getOperand(0).getReg();
2193 LLT Ty = MRI.getType(DefReg);
2194 if (Ty.getSizeInBits() == 64) {
2195 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2196 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2197 } else if (Ty.getSizeInBits() == 32) {
2198 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2199 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2200 } else
2201 return false;
2202
2203 I.setDesc(TII.get(TargetOpcode::COPY));
2204 return true;
2205 }
2206
2207 case TargetOpcode::G_ADD: {
2208 // Check if this is being fed by a G_ICMP on either side.
2209 //
2210 // (cmp pred, x, y) + z
2211 //
2212 // In the above case, when the cmp is true, we increment z by 1. So, we can
2213 // fold the add into the cset for the cmp by using cinc.
2214 //
2215 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2216 Register X = I.getOperand(1).getReg();
2217
2218 // Only handle scalars. Scalar G_ICMP is only legal for s32, so bail out
2219 // early if we see it.
2220 LLT Ty = MRI.getType(X);
2221 if (Ty.isVector() || Ty.getSizeInBits() != 32)
2222 return false;
2223
2224 Register CmpReg = I.getOperand(2).getReg();
2225 MachineInstr *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, CmpReg, MRI);
2226 if (!Cmp) {
2227 std::swap(X, CmpReg);
2228 Cmp = getOpcodeDef(TargetOpcode::G_ICMP, CmpReg, MRI);
2229 if (!Cmp)
2230 return false;
2231 }
2232 auto Pred =
2233 static_cast<CmpInst::Predicate>(Cmp->getOperand(1).getPredicate());
2234 emitIntegerCompare(Cmp->getOperand(2), Cmp->getOperand(3),
2235 Cmp->getOperand(1), MIB);
2236 emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIB, X);
2237 I.eraseFromParent();
2238 return true;
2239 }
2240 case TargetOpcode::G_OR: {
2241 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2242 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2243 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2244 Register Dst = I.getOperand(0).getReg();
2245 LLT Ty = MRI.getType(Dst);
2246
2247 if (!Ty.isScalar())
2248 return false;
2249
2250 unsigned Size = Ty.getSizeInBits();
2251 if (Size != 32 && Size != 64)
2252 return false;
2253
2254 Register ShiftSrc;
2255 int64_t ShiftImm;
2256 Register MaskSrc;
2257 int64_t MaskImm;
2258 if (!mi_match(
2259 Dst, MRI,
2260 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2261 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2262 return false;
2263
2264 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2265 return false;
2266
2267 int64_t Immr = Size - ShiftImm;
2268 int64_t Imms = Size - ShiftImm - 1;
2269 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2270 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2271 I.eraseFromParent();
2272 return true;
2273 }
2274 default:
2275 return false;
2276 }
2277 }
2278
select(MachineInstr & I)2279 bool AArch64InstructionSelector::select(MachineInstr &I) {
2280 assert(I.getParent() && "Instruction should be in a basic block!");
2281 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2282
2283 MachineBasicBlock &MBB = *I.getParent();
2284 MachineFunction &MF = *MBB.getParent();
2285 MachineRegisterInfo &MRI = MF.getRegInfo();
2286
2287 const AArch64Subtarget *Subtarget =
2288 &static_cast<const AArch64Subtarget &>(MF.getSubtarget());
2289 if (Subtarget->requiresStrictAlign()) {
2290 // We don't support this feature yet.
2291 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
2292 return false;
2293 }
2294
2295 MIB.setInstrAndDebugLoc(I);
2296
2297 unsigned Opcode = I.getOpcode();
2298 // G_PHI requires same handling as PHI
2299 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2300 // Certain non-generic instructions also need some special handling.
2301
2302 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2303 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2304
2305 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2306 const Register DefReg = I.getOperand(0).getReg();
2307 const LLT DefTy = MRI.getType(DefReg);
2308
2309 const RegClassOrRegBank &RegClassOrBank =
2310 MRI.getRegClassOrRegBank(DefReg);
2311
2312 const TargetRegisterClass *DefRC
2313 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2314 if (!DefRC) {
2315 if (!DefTy.isValid()) {
2316 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2317 return false;
2318 }
2319 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
2320 DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
2321 if (!DefRC) {
2322 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2323 return false;
2324 }
2325 }
2326
2327 I.setDesc(TII.get(TargetOpcode::PHI));
2328
2329 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2330 }
2331
2332 if (I.isCopy())
2333 return selectCopy(I, TII, MRI, TRI, RBI);
2334
2335 return true;
2336 }
2337
2338
2339 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2340 LLVM_DEBUG(
2341 dbgs() << "Generic instruction has unexpected implicit operands\n");
2342 return false;
2343 }
2344
2345 // Try to do some lowering before we start instruction selecting. These
2346 // lowerings are purely transformations on the input G_MIR and so selection
2347 // must continue after any modification of the instruction.
2348 if (preISelLower(I)) {
2349 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2350 }
2351
2352 // There may be patterns where the importer can't deal with them optimally,
2353 // but does select it to a suboptimal sequence so our custom C++ selection
2354 // code later never has a chance to work on it. Therefore, we have an early
2355 // selection attempt here to give priority to certain selection routines
2356 // over the imported ones.
2357 if (earlySelect(I))
2358 return true;
2359
2360 if (selectImpl(I, *CoverageInfo))
2361 return true;
2362
2363 LLT Ty =
2364 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2365
2366 switch (Opcode) {
2367 case TargetOpcode::G_SBFX:
2368 case TargetOpcode::G_UBFX: {
2369 static const unsigned OpcTable[2][2] = {
2370 {AArch64::UBFMWri, AArch64::UBFMXri},
2371 {AArch64::SBFMWri, AArch64::SBFMXri}};
2372 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2373 unsigned Size = Ty.getSizeInBits();
2374 unsigned Opc = OpcTable[IsSigned][Size == 64];
2375 auto Cst1 =
2376 getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2377 assert(Cst1 && "Should have gotten a constant for src 1?");
2378 auto Cst2 =
2379 getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2380 assert(Cst2 && "Should have gotten a constant for src 2?");
2381 auto LSB = Cst1->Value.getZExtValue();
2382 auto Width = Cst2->Value.getZExtValue();
2383 auto BitfieldInst =
2384 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2385 .addImm(LSB)
2386 .addImm(LSB + Width - 1);
2387 I.eraseFromParent();
2388 return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2389 }
2390 case TargetOpcode::G_BRCOND:
2391 return selectCompareBranch(I, MF, MRI);
2392
2393 case TargetOpcode::G_BRINDIRECT: {
2394 I.setDesc(TII.get(AArch64::BR));
2395 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2396 }
2397
2398 case TargetOpcode::G_BRJT:
2399 return selectBrJT(I, MRI);
2400
2401 case AArch64::G_ADD_LOW: {
2402 // This op may have been separated from it's ADRP companion by the localizer
2403 // or some other code motion pass. Given that many CPUs will try to
2404 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2405 // which will later be expanded into an ADRP+ADD pair after scheduling.
2406 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2407 if (BaseMI->getOpcode() != AArch64::ADRP) {
2408 I.setDesc(TII.get(AArch64::ADDXri));
2409 I.addOperand(MachineOperand::CreateImm(0));
2410 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2411 }
2412 assert(TM.getCodeModel() == CodeModel::Small &&
2413 "Expected small code model");
2414 auto Op1 = BaseMI->getOperand(1);
2415 auto Op2 = I.getOperand(2);
2416 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2417 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2418 Op1.getTargetFlags())
2419 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2420 Op2.getTargetFlags());
2421 I.eraseFromParent();
2422 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2423 }
2424
2425 case TargetOpcode::G_BSWAP: {
2426 // Handle vector types for G_BSWAP directly.
2427 Register DstReg = I.getOperand(0).getReg();
2428 LLT DstTy = MRI.getType(DstReg);
2429
2430 // We should only get vector types here; everything else is handled by the
2431 // importer right now.
2432 if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
2433 LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
2434 return false;
2435 }
2436
2437 // Only handle 4 and 2 element vectors for now.
2438 // TODO: 16-bit elements.
2439 unsigned NumElts = DstTy.getNumElements();
2440 if (NumElts != 4 && NumElts != 2) {
2441 LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
2442 return false;
2443 }
2444
2445 // Choose the correct opcode for the supported types. Right now, that's
2446 // v2s32, v4s32, and v2s64.
2447 unsigned Opc = 0;
2448 unsigned EltSize = DstTy.getElementType().getSizeInBits();
2449 if (EltSize == 32)
2450 Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
2451 : AArch64::REV32v16i8;
2452 else if (EltSize == 64)
2453 Opc = AArch64::REV64v16i8;
2454
2455 // We should always get something by the time we get here...
2456 assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
2457
2458 I.setDesc(TII.get(Opc));
2459 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2460 }
2461
2462 case TargetOpcode::G_FCONSTANT:
2463 case TargetOpcode::G_CONSTANT: {
2464 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2465
2466 const LLT s8 = LLT::scalar(8);
2467 const LLT s16 = LLT::scalar(16);
2468 const LLT s32 = LLT::scalar(32);
2469 const LLT s64 = LLT::scalar(64);
2470 const LLT s128 = LLT::scalar(128);
2471 const LLT p0 = LLT::pointer(0, 64);
2472
2473 const Register DefReg = I.getOperand(0).getReg();
2474 const LLT DefTy = MRI.getType(DefReg);
2475 const unsigned DefSize = DefTy.getSizeInBits();
2476 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2477
2478 // FIXME: Redundant check, but even less readable when factored out.
2479 if (isFP) {
2480 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2481 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2482 << " constant, expected: " << s16 << " or " << s32
2483 << " or " << s64 << " or " << s128 << '\n');
2484 return false;
2485 }
2486
2487 if (RB.getID() != AArch64::FPRRegBankID) {
2488 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2489 << " constant on bank: " << RB
2490 << ", expected: FPR\n");
2491 return false;
2492 }
2493
2494 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2495 // can be sure tablegen works correctly and isn't rescued by this code.
2496 // 0.0 is not covered by tablegen for FP128. So we will handle this
2497 // scenario in the code here.
2498 if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2499 return false;
2500 } else {
2501 // s32 and s64 are covered by tablegen.
2502 if (Ty != p0 && Ty != s8 && Ty != s16) {
2503 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2504 << " constant, expected: " << s32 << ", " << s64
2505 << ", or " << p0 << '\n');
2506 return false;
2507 }
2508
2509 if (RB.getID() != AArch64::GPRRegBankID) {
2510 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2511 << " constant on bank: " << RB
2512 << ", expected: GPR\n");
2513 return false;
2514 }
2515 }
2516
2517 if (isFP) {
2518 const TargetRegisterClass &FPRRC = *getMinClassForRegBank(RB, DefSize);
2519 // For 16, 64, and 128b values, emit a constant pool load.
2520 switch (DefSize) {
2521 default:
2522 llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
2523 case 32:
2524 // For s32, use a cp load if we have optsize/minsize.
2525 if (!shouldOptForSize(&MF))
2526 break;
2527 LLVM_FALLTHROUGH;
2528 case 16:
2529 case 64:
2530 case 128: {
2531 auto *FPImm = I.getOperand(1).getFPImm();
2532 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2533 if (!LoadMI) {
2534 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2535 return false;
2536 }
2537 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2538 I.eraseFromParent();
2539 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2540 }
2541 }
2542
2543 // Either emit a FMOV, or emit a copy to emit a normal mov.
2544 assert(DefSize == 32 &&
2545 "Expected constant pool loads for all sizes other than 32!");
2546 const Register DefGPRReg =
2547 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2548 MachineOperand &RegOp = I.getOperand(0);
2549 RegOp.setReg(DefGPRReg);
2550 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2551 MIB.buildCopy({DefReg}, {DefGPRReg});
2552
2553 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2554 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2555 return false;
2556 }
2557
2558 MachineOperand &ImmOp = I.getOperand(1);
2559 // FIXME: Is going through int64_t always correct?
2560 ImmOp.ChangeToImmediate(
2561 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
2562 } else if (I.getOperand(1).isCImm()) {
2563 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2564 I.getOperand(1).ChangeToImmediate(Val);
2565 } else if (I.getOperand(1).isImm()) {
2566 uint64_t Val = I.getOperand(1).getImm();
2567 I.getOperand(1).ChangeToImmediate(Val);
2568 }
2569
2570 const unsigned MovOpc =
2571 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2572 I.setDesc(TII.get(MovOpc));
2573 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2574 return true;
2575 }
2576 case TargetOpcode::G_EXTRACT: {
2577 Register DstReg = I.getOperand(0).getReg();
2578 Register SrcReg = I.getOperand(1).getReg();
2579 LLT SrcTy = MRI.getType(SrcReg);
2580 LLT DstTy = MRI.getType(DstReg);
2581 (void)DstTy;
2582 unsigned SrcSize = SrcTy.getSizeInBits();
2583
2584 if (SrcTy.getSizeInBits() > 64) {
2585 // This should be an extract of an s128, which is like a vector extract.
2586 if (SrcTy.getSizeInBits() != 128)
2587 return false;
2588 // Only support extracting 64 bits from an s128 at the moment.
2589 if (DstTy.getSizeInBits() != 64)
2590 return false;
2591
2592 unsigned Offset = I.getOperand(2).getImm();
2593 if (Offset % 64 != 0)
2594 return false;
2595
2596 // Check we have the right regbank always.
2597 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2598 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2599 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");
2600
2601 if (SrcRB.getID() == AArch64::GPRRegBankID) {
2602 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2603 .addUse(SrcReg, 0, Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2604 I.eraseFromParent();
2605 return true;
2606 }
2607
2608 // Emit the same code as a vector extract.
2609 // Offset must be a multiple of 64.
2610 unsigned LaneIdx = Offset / 64;
2611 MachineInstr *Extract = emitExtractVectorElt(
2612 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2613 if (!Extract)
2614 return false;
2615 I.eraseFromParent();
2616 return true;
2617 }
2618
2619 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2620 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2621 Ty.getSizeInBits() - 1);
2622
2623 if (SrcSize < 64) {
2624 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
2625 "unexpected G_EXTRACT types");
2626 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2627 }
2628
2629 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2630 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2631 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2632 .addReg(DstReg, 0, AArch64::sub_32);
2633 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2634 AArch64::GPR32RegClass, MRI);
2635 I.getOperand(0).setReg(DstReg);
2636
2637 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2638 }
2639
2640 case TargetOpcode::G_INSERT: {
2641 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2642 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2643 unsigned DstSize = DstTy.getSizeInBits();
2644 // Larger inserts are vectors, same-size ones should be something else by
2645 // now (split up or turned into COPYs).
2646 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2647 return false;
2648
2649 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2650 unsigned LSB = I.getOperand(3).getImm();
2651 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2652 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2653 MachineInstrBuilder(MF, I).addImm(Width - 1);
2654
2655 if (DstSize < 64) {
2656 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
2657 "unexpected G_INSERT types");
2658 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2659 }
2660
2661 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2662 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2663 TII.get(AArch64::SUBREG_TO_REG))
2664 .addDef(SrcReg)
2665 .addImm(0)
2666 .addUse(I.getOperand(2).getReg())
2667 .addImm(AArch64::sub_32);
2668 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2669 AArch64::GPR32RegClass, MRI);
2670 I.getOperand(2).setReg(SrcReg);
2671
2672 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2673 }
2674 case TargetOpcode::G_FRAME_INDEX: {
2675 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2676 if (Ty != LLT::pointer(0, 64)) {
2677 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2678 << ", expected: " << LLT::pointer(0, 64) << '\n');
2679 return false;
2680 }
2681 I.setDesc(TII.get(AArch64::ADDXri));
2682
2683 // MOs for a #0 shifted immediate.
2684 I.addOperand(MachineOperand::CreateImm(0));
2685 I.addOperand(MachineOperand::CreateImm(0));
2686
2687 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2688 }
2689
2690 case TargetOpcode::G_GLOBAL_VALUE: {
2691 auto GV = I.getOperand(1).getGlobal();
2692 if (GV->isThreadLocal())
2693 return selectTLSGlobalValue(I, MRI);
2694
2695 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
2696 if (OpFlags & AArch64II::MO_GOT) {
2697 I.setDesc(TII.get(AArch64::LOADgot));
2698 I.getOperand(1).setTargetFlags(OpFlags);
2699 } else if (TM.getCodeModel() == CodeModel::Large) {
2700 // Materialize the global using movz/movk instructions.
2701 materializeLargeCMVal(I, GV, OpFlags);
2702 I.eraseFromParent();
2703 return true;
2704 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2705 I.setDesc(TII.get(AArch64::ADR));
2706 I.getOperand(1).setTargetFlags(OpFlags);
2707 } else {
2708 I.setDesc(TII.get(AArch64::MOVaddr));
2709 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2710 MachineInstrBuilder MIB(MF, I);
2711 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2712 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2713 }
2714 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2715 }
2716
2717 case TargetOpcode::G_ZEXTLOAD:
2718 case TargetOpcode::G_LOAD:
2719 case TargetOpcode::G_STORE: {
2720 GLoadStore &LdSt = cast<GLoadStore>(I);
2721 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2722 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
2723
2724 if (PtrTy != LLT::pointer(0, 64)) {
2725 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
2726 << ", expected: " << LLT::pointer(0, 64) << '\n');
2727 return false;
2728 }
2729
2730 uint64_t MemSizeInBytes = LdSt.getMemSize();
2731 unsigned MemSizeInBits = LdSt.getMemSizeInBits();
2732 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2733
2734 // Need special instructions for atomics that affect ordering.
2735 if (Order != AtomicOrdering::NotAtomic &&
2736 Order != AtomicOrdering::Unordered &&
2737 Order != AtomicOrdering::Monotonic) {
2738 assert(!isa<GZExtLoad>(LdSt));
2739 if (MemSizeInBytes > 64)
2740 return false;
2741
2742 if (isa<GLoad>(LdSt)) {
2743 static unsigned Opcodes[] = {AArch64::LDARB, AArch64::LDARH,
2744 AArch64::LDARW, AArch64::LDARX};
2745 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2746 } else {
2747 static unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2748 AArch64::STLRW, AArch64::STLRX};
2749 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2750 }
2751 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2752 return true;
2753 }
2754
2755 #ifndef NDEBUG
2756 const Register PtrReg = LdSt.getPointerReg();
2757 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2758 // Sanity-check the pointer register.
2759 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
2760 "Load/Store pointer operand isn't a GPR");
2761 assert(MRI.getType(PtrReg).isPointer() &&
2762 "Load/Store pointer operand isn't a pointer");
2763 #endif
2764
2765 const Register ValReg = LdSt.getReg(0);
2766 const LLT ValTy = MRI.getType(ValReg);
2767 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2768
2769 // The code below doesn't support truncating stores, so we need to split it
2770 // again.
2771 if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2772 unsigned SubReg;
2773 LLT MemTy = LdSt.getMMO().getMemoryType();
2774 auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
2775 if (!getSubRegForClass(RC, TRI, SubReg))
2776 return false;
2777
2778 // Generate a subreg copy.
2779 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
2780 .addReg(ValReg, 0, SubReg)
2781 .getReg(0);
2782 RBI.constrainGenericRegister(Copy, *RC, MRI);
2783 LdSt.getOperand(0).setReg(Copy);
2784 } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2785 // If this is an any-extending load from the FPR bank, split it into a regular
2786 // load + extend.
2787 if (RB.getID() == AArch64::FPRRegBankID) {
2788 unsigned SubReg;
2789 LLT MemTy = LdSt.getMMO().getMemoryType();
2790 auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
2791 if (!getSubRegForClass(RC, TRI, SubReg))
2792 return false;
2793 Register OldDst = LdSt.getReg(0);
2794 Register NewDst =
2795 MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
2796 LdSt.getOperand(0).setReg(NewDst);
2797 MRI.setRegBank(NewDst, RB);
2798 // Generate a SUBREG_TO_REG to extend it.
2799 MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
2800 MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2801 .addImm(0)
2802 .addUse(NewDst)
2803 .addImm(SubReg);
2804 auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB, RBI);
2805 RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
2806 MIB.setInstr(LdSt);
2807 }
2808 }
2809
2810 // Helper lambda for partially selecting I. Either returns the original
2811 // instruction with an updated opcode, or a new instruction.
2812 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2813 bool IsStore = isa<GStore>(I);
2814 const unsigned NewOpc =
2815 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2816 if (NewOpc == I.getOpcode())
2817 return nullptr;
2818 // Check if we can fold anything into the addressing mode.
2819 auto AddrModeFns =
2820 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
2821 if (!AddrModeFns) {
2822 // Can't fold anything. Use the original instruction.
2823 I.setDesc(TII.get(NewOpc));
2824 I.addOperand(MachineOperand::CreateImm(0));
2825 return &I;
2826 }
2827
2828 // Folded something. Create a new instruction and return it.
2829 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2830 Register CurValReg = I.getOperand(0).getReg();
2831 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
2832 NewInst.cloneMemRefs(I);
2833 for (auto &Fn : *AddrModeFns)
2834 Fn(NewInst);
2835 I.eraseFromParent();
2836 return &*NewInst;
2837 };
2838
2839 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
2840 if (!LoadStore)
2841 return false;
2842
2843 // If we're storing a 0, use WZR/XZR.
2844 if (Opcode == TargetOpcode::G_STORE) {
2845 auto CVal = getIConstantVRegValWithLookThrough(
2846 LoadStore->getOperand(0).getReg(), MRI);
2847 if (CVal && CVal->Value == 0) {
2848 switch (LoadStore->getOpcode()) {
2849 case AArch64::STRWui:
2850 case AArch64::STRHHui:
2851 case AArch64::STRBBui:
2852 LoadStore->getOperand(0).setReg(AArch64::WZR);
2853 break;
2854 case AArch64::STRXui:
2855 LoadStore->getOperand(0).setReg(AArch64::XZR);
2856 break;
2857 }
2858 }
2859 }
2860
2861 if (IsZExtLoad) {
2862 // The zextload from a smaller type to i32 should be handled by the
2863 // importer.
2864 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
2865 return false;
2866 // If we have a ZEXTLOAD then change the load's type to be a narrower reg
2867 // and zero_extend with SUBREG_TO_REG.
2868 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2869 Register DstReg = LoadStore->getOperand(0).getReg();
2870 LoadStore->getOperand(0).setReg(LdReg);
2871
2872 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
2873 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
2874 .addImm(0)
2875 .addUse(LdReg)
2876 .addImm(AArch64::sub_32);
2877 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2878 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
2879 MRI);
2880 }
2881 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2882 }
2883
2884 case TargetOpcode::G_SMULH:
2885 case TargetOpcode::G_UMULH: {
2886 // Reject the various things we don't support yet.
2887 if (unsupportedBinOp(I, RBI, MRI, TRI))
2888 return false;
2889
2890 const Register DefReg = I.getOperand(0).getReg();
2891 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2892
2893 if (RB.getID() != AArch64::GPRRegBankID) {
2894 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
2895 return false;
2896 }
2897
2898 if (Ty != LLT::scalar(64)) {
2899 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
2900 << ", expected: " << LLT::scalar(64) << '\n');
2901 return false;
2902 }
2903
2904 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
2905 : AArch64::UMULHrr;
2906 I.setDesc(TII.get(NewOpc));
2907
2908 // Now that we selected an opcode, we need to constrain the register
2909 // operands to use appropriate classes.
2910 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2911 }
2912 case TargetOpcode::G_LSHR:
2913 case TargetOpcode::G_ASHR:
2914 if (MRI.getType(I.getOperand(0).getReg()).isVector())
2915 return selectVectorAshrLshr(I, MRI);
2916 LLVM_FALLTHROUGH;
2917 case TargetOpcode::G_SHL:
2918 if (Opcode == TargetOpcode::G_SHL &&
2919 MRI.getType(I.getOperand(0).getReg()).isVector())
2920 return selectVectorSHL(I, MRI);
2921 LLVM_FALLTHROUGH;
2922 case TargetOpcode::G_FADD:
2923 case TargetOpcode::G_FSUB:
2924 case TargetOpcode::G_FMUL:
2925 case TargetOpcode::G_FDIV:
2926 case TargetOpcode::G_OR: {
2927 // Reject the various things we don't support yet.
2928 if (unsupportedBinOp(I, RBI, MRI, TRI))
2929 return false;
2930
2931 const unsigned OpSize = Ty.getSizeInBits();
2932
2933 const Register DefReg = I.getOperand(0).getReg();
2934 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2935
2936 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
2937 if (NewOpc == I.getOpcode())
2938 return false;
2939
2940 I.setDesc(TII.get(NewOpc));
2941 // FIXME: Should the type be always reset in setDesc?
2942
2943 // Now that we selected an opcode, we need to constrain the register
2944 // operands to use appropriate classes.
2945 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2946 }
2947
2948 case TargetOpcode::G_PTR_ADD: {
2949 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
2950 I.eraseFromParent();
2951 return true;
2952 }
2953 case TargetOpcode::G_SADDO:
2954 case TargetOpcode::G_UADDO:
2955 case TargetOpcode::G_SSUBO:
2956 case TargetOpcode::G_USUBO: {
2957 // Emit the operation and get the correct condition code.
2958 auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
2959 I.getOperand(2), I.getOperand(3), MIB);
2960
2961 // Now, put the overflow result in the register given by the first operand
2962 // to the overflow op. CSINC increments the result when the predicate is
2963 // false, so to get the increment when it's true, we need to use the
2964 // inverse. In this case, we want to increment when carry is set.
2965 Register ZReg = AArch64::WZR;
2966 auto CsetMI = MIB.buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
2967 {ZReg, ZReg})
2968 .addImm(getInvertedCondCode(OpAndCC.second));
2969 constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
2970 I.eraseFromParent();
2971 return true;
2972 }
2973
2974 case TargetOpcode::G_PTRMASK: {
2975 Register MaskReg = I.getOperand(2).getReg();
2976 Optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
2977 // TODO: Implement arbitrary cases
2978 if (!MaskVal || !isShiftedMask_64(*MaskVal))
2979 return false;
2980
2981 uint64_t Mask = *MaskVal;
2982 I.setDesc(TII.get(AArch64::ANDXri));
2983 I.getOperand(2).ChangeToImmediate(
2984 AArch64_AM::encodeLogicalImmediate(Mask, 64));
2985
2986 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2987 }
2988 case TargetOpcode::G_PTRTOINT:
2989 case TargetOpcode::G_TRUNC: {
2990 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2991 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2992
2993 const Register DstReg = I.getOperand(0).getReg();
2994 const Register SrcReg = I.getOperand(1).getReg();
2995
2996 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2997 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2998
2999 if (DstRB.getID() != SrcRB.getID()) {
3000 LLVM_DEBUG(
3001 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
3002 return false;
3003 }
3004
3005 if (DstRB.getID() == AArch64::GPRRegBankID) {
3006 const TargetRegisterClass *DstRC =
3007 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
3008 if (!DstRC)
3009 return false;
3010
3011 const TargetRegisterClass *SrcRC =
3012 getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
3013 if (!SrcRC)
3014 return false;
3015
3016 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
3017 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
3018 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
3019 return false;
3020 }
3021
3022 if (DstRC == SrcRC) {
3023 // Nothing to be done
3024 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3025 SrcTy == LLT::scalar(64)) {
3026 llvm_unreachable("TableGen can import this case");
3027 return false;
3028 } else if (DstRC == &AArch64::GPR32RegClass &&
3029 SrcRC == &AArch64::GPR64RegClass) {
3030 I.getOperand(1).setSubReg(AArch64::sub_32);
3031 } else {
3032 LLVM_DEBUG(
3033 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3034 return false;
3035 }
3036
3037 I.setDesc(TII.get(TargetOpcode::COPY));
3038 return true;
3039 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3040 if (DstTy == LLT::fixed_vector(4, 16) &&
3041 SrcTy == LLT::fixed_vector(4, 32)) {
3042 I.setDesc(TII.get(AArch64::XTNv4i16));
3043 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3044 return true;
3045 }
3046
3047 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3048 MachineInstr *Extract = emitExtractVectorElt(
3049 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3050 if (!Extract)
3051 return false;
3052 I.eraseFromParent();
3053 return true;
3054 }
3055
3056 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3057 if (Opcode == TargetOpcode::G_PTRTOINT) {
3058 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
3059 I.setDesc(TII.get(TargetOpcode::COPY));
3060 return selectCopy(I, TII, MRI, TRI, RBI);
3061 }
3062 }
3063
3064 return false;
3065 }
3066
3067 case TargetOpcode::G_ANYEXT: {
3068 if (selectUSMovFromExtend(I, MRI))
3069 return true;
3070
3071 const Register DstReg = I.getOperand(0).getReg();
3072 const Register SrcReg = I.getOperand(1).getReg();
3073
3074 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3075 if (RBDst.getID() != AArch64::GPRRegBankID) {
3076 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
3077 << ", expected: GPR\n");
3078 return false;
3079 }
3080
3081 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3082 if (RBSrc.getID() != AArch64::GPRRegBankID) {
3083 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
3084 << ", expected: GPR\n");
3085 return false;
3086 }
3087
3088 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3089
3090 if (DstSize == 0) {
3091 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
3092 return false;
3093 }
3094
3095 if (DstSize != 64 && DstSize > 32) {
3096 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
3097 << ", expected: 32 or 64\n");
3098 return false;
3099 }
3100 // At this point G_ANYEXT is just like a plain COPY, but we need
3101 // to explicitly form the 64-bit value if any.
3102 if (DstSize > 32) {
3103 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3104 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3105 .addDef(ExtSrc)
3106 .addImm(0)
3107 .addUse(SrcReg)
3108 .addImm(AArch64::sub_32);
3109 I.getOperand(1).setReg(ExtSrc);
3110 }
3111 return selectCopy(I, TII, MRI, TRI, RBI);
3112 }
3113
3114 case TargetOpcode::G_ZEXT:
3115 case TargetOpcode::G_SEXT_INREG:
3116 case TargetOpcode::G_SEXT: {
3117 if (selectUSMovFromExtend(I, MRI))
3118 return true;
3119
3120 unsigned Opcode = I.getOpcode();
3121 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3122 const Register DefReg = I.getOperand(0).getReg();
3123 Register SrcReg = I.getOperand(1).getReg();
3124 const LLT DstTy = MRI.getType(DefReg);
3125 const LLT SrcTy = MRI.getType(SrcReg);
3126 unsigned DstSize = DstTy.getSizeInBits();
3127 unsigned SrcSize = SrcTy.getSizeInBits();
3128
3129 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3130 // extended is encoded in the imm.
3131 if (Opcode == TargetOpcode::G_SEXT_INREG)
3132 SrcSize = I.getOperand(2).getImm();
3133
3134 if (DstTy.isVector())
3135 return false; // Should be handled by imported patterns.
3136
3137 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
3138 AArch64::GPRRegBankID &&
3139 "Unexpected ext regbank");
3140
3141 MachineInstr *ExtI;
3142
3143 // First check if we're extending the result of a load which has a dest type
3144 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3145 // GPR register on AArch64 and all loads which are smaller automatically
3146 // zero-extend the upper bits. E.g.
3147 // %v(s8) = G_LOAD %p, :: (load 1)
3148 // %v2(s32) = G_ZEXT %v(s8)
3149 if (!IsSigned) {
3150 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3151 bool IsGPR =
3152 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3153 if (LoadMI && IsGPR) {
3154 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3155 unsigned BytesLoaded = MemOp->getSize();
3156 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3157 return selectCopy(I, TII, MRI, TRI, RBI);
3158 }
3159
3160 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3161 // + SUBREG_TO_REG.
3162 //
3163 // If we are zero extending from 32 bits to 64 bits, it's possible that
3164 // the instruction implicitly does the zero extend for us. In that case,
3165 // we only need the SUBREG_TO_REG.
3166 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3167 // Unlike with the G_LOAD case, we don't want to look through copies
3168 // here. (See isDef32.)
3169 MachineInstr *Def = MRI.getVRegDef(SrcReg);
3170 Register SubregToRegSrc = SrcReg;
3171
3172 // Does the instruction implicitly zero extend?
3173 if (!Def || !isDef32(*Def)) {
3174 // No. Zero out using an OR.
3175 Register OrDst = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3176 const Register ZReg = AArch64::WZR;
3177 MIB.buildInstr(AArch64::ORRWrs, {OrDst}, {ZReg, SrcReg}).addImm(0);
3178 SubregToRegSrc = OrDst;
3179 }
3180
3181 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3182 .addImm(0)
3183 .addUse(SubregToRegSrc)
3184 .addImm(AArch64::sub_32);
3185
3186 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3187 MRI)) {
3188 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
3189 return false;
3190 }
3191
3192 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3193 MRI)) {
3194 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
3195 return false;
3196 }
3197
3198 I.eraseFromParent();
3199 return true;
3200 }
3201 }
3202
3203 if (DstSize == 64) {
3204 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3205 // FIXME: Can we avoid manually doing this?
3206 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3207 MRI)) {
3208 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
3209 << " operand\n");
3210 return false;
3211 }
3212 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3213 {&AArch64::GPR64RegClass}, {})
3214 .addImm(0)
3215 .addUse(SrcReg)
3216 .addImm(AArch64::sub_32)
3217 .getReg(0);
3218 }
3219
3220 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3221 {DefReg}, {SrcReg})
3222 .addImm(0)
3223 .addImm(SrcSize - 1);
3224 } else if (DstSize <= 32) {
3225 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3226 {DefReg}, {SrcReg})
3227 .addImm(0)
3228 .addImm(SrcSize - 1);
3229 } else {
3230 return false;
3231 }
3232
3233 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
3234 I.eraseFromParent();
3235 return true;
3236 }
3237
3238 case TargetOpcode::G_SITOFP:
3239 case TargetOpcode::G_UITOFP:
3240 case TargetOpcode::G_FPTOSI:
3241 case TargetOpcode::G_FPTOUI: {
3242 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
3243 SrcTy = MRI.getType(I.getOperand(1).getReg());
3244 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
3245 if (NewOpc == Opcode)
3246 return false;
3247
3248 I.setDesc(TII.get(NewOpc));
3249 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3250
3251 return true;
3252 }
3253
3254 case TargetOpcode::G_FREEZE:
3255 return selectCopy(I, TII, MRI, TRI, RBI);
3256
3257 case TargetOpcode::G_INTTOPTR:
3258 // The importer is currently unable to import pointer types since they
3259 // didn't exist in SelectionDAG.
3260 return selectCopy(I, TII, MRI, TRI, RBI);
3261
3262 case TargetOpcode::G_BITCAST:
3263 // Imported SelectionDAG rules can handle every bitcast except those that
3264 // bitcast from a type to the same type. Ideally, these shouldn't occur
3265 // but we might not run an optimizer that deletes them. The other exception
3266 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3267 // of them.
3268 return selectCopy(I, TII, MRI, TRI, RBI);
3269
3270 case TargetOpcode::G_SELECT: {
3271 if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
3272 LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
3273 << ", expected: " << LLT::scalar(1) << '\n');
3274 return false;
3275 }
3276
3277 const Register CondReg = I.getOperand(1).getReg();
3278 const Register TReg = I.getOperand(2).getReg();
3279 const Register FReg = I.getOperand(3).getReg();
3280
3281 if (tryOptSelect(I))
3282 return true;
3283
3284 // Make sure to use an unused vreg instead of wzr, so that the peephole
3285 // optimizations will be able to optimize these.
3286 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3287 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3288 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3289 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3290 if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB))
3291 return false;
3292 I.eraseFromParent();
3293 return true;
3294 }
3295 case TargetOpcode::G_ICMP: {
3296 if (Ty.isVector())
3297 return selectVectorICmp(I, MRI);
3298
3299 if (Ty != LLT::scalar(32)) {
3300 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
3301 << ", expected: " << LLT::scalar(32) << '\n');
3302 return false;
3303 }
3304
3305 auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3306 emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
3307 MIB);
3308 emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIB);
3309 I.eraseFromParent();
3310 return true;
3311 }
3312
3313 case TargetOpcode::G_FCMP: {
3314 CmpInst::Predicate Pred =
3315 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3316 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3317 Pred) ||
3318 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3319 return false;
3320 I.eraseFromParent();
3321 return true;
3322 }
3323 case TargetOpcode::G_VASTART:
3324 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3325 : selectVaStartAAPCS(I, MF, MRI);
3326 case TargetOpcode::G_INTRINSIC:
3327 return selectIntrinsic(I, MRI);
3328 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3329 return selectIntrinsicWithSideEffects(I, MRI);
3330 case TargetOpcode::G_IMPLICIT_DEF: {
3331 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3332 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3333 const Register DstReg = I.getOperand(0).getReg();
3334 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3335 const TargetRegisterClass *DstRC =
3336 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
3337 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3338 return true;
3339 }
3340 case TargetOpcode::G_BLOCK_ADDR: {
3341 if (TM.getCodeModel() == CodeModel::Large) {
3342 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3343 I.eraseFromParent();
3344 return true;
3345 } else {
3346 I.setDesc(TII.get(AArch64::MOVaddrBA));
3347 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3348 I.getOperand(0).getReg())
3349 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3350 /* Offset */ 0, AArch64II::MO_PAGE)
3351 .addBlockAddress(
3352 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3353 AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3354 I.eraseFromParent();
3355 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3356 }
3357 }
3358 case AArch64::G_DUP: {
3359 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3360 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3361 // difficult because at RBS we may end up pessimizing the fpr case if we
3362 // decided to add an anyextend to fix this. Manual selection is the most
3363 // robust solution for now.
3364 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3365 AArch64::GPRRegBankID)
3366 return false; // We expect the fpr regbank case to be imported.
3367 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3368 if (VecTy == LLT::fixed_vector(8, 8))
3369 I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3370 else if (VecTy == LLT::fixed_vector(16, 8))
3371 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3372 else if (VecTy == LLT::fixed_vector(4, 16))
3373 I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3374 else if (VecTy == LLT::fixed_vector(8, 16))
3375 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3376 else
3377 return false;
3378 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3379 }
3380 case TargetOpcode::G_INTRINSIC_TRUNC:
3381 return selectIntrinsicTrunc(I, MRI);
3382 case TargetOpcode::G_INTRINSIC_ROUND:
3383 return selectIntrinsicRound(I, MRI);
3384 case TargetOpcode::G_BUILD_VECTOR:
3385 return selectBuildVector(I, MRI);
3386 case TargetOpcode::G_MERGE_VALUES:
3387 return selectMergeValues(I, MRI);
3388 case TargetOpcode::G_UNMERGE_VALUES:
3389 return selectUnmergeValues(I, MRI);
3390 case TargetOpcode::G_SHUFFLE_VECTOR:
3391 return selectShuffleVector(I, MRI);
3392 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3393 return selectExtractElt(I, MRI);
3394 case TargetOpcode::G_INSERT_VECTOR_ELT:
3395 return selectInsertElt(I, MRI);
3396 case TargetOpcode::G_CONCAT_VECTORS:
3397 return selectConcatVectors(I, MRI);
3398 case TargetOpcode::G_JUMP_TABLE:
3399 return selectJumpTable(I, MRI);
3400 case TargetOpcode::G_VECREDUCE_FADD:
3401 case TargetOpcode::G_VECREDUCE_ADD:
3402 return selectReduction(I, MRI);
3403 }
3404
3405 return false;
3406 }
3407
selectReduction(MachineInstr & I,MachineRegisterInfo & MRI)3408 bool AArch64InstructionSelector::selectReduction(MachineInstr &I,
3409 MachineRegisterInfo &MRI) {
3410 Register VecReg = I.getOperand(1).getReg();
3411 LLT VecTy = MRI.getType(VecReg);
3412 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
3413 // For <2 x i32> ADDPv2i32 generates an FPR64 value, so we need to emit
3414 // a subregister copy afterwards.
3415 if (VecTy == LLT::fixed_vector(2, 32)) {
3416 Register DstReg = I.getOperand(0).getReg();
3417 auto AddP = MIB.buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass},
3418 {VecReg, VecReg});
3419 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3420 .addReg(AddP.getReg(0), 0, AArch64::ssub)
3421 .getReg(0);
3422 RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass, MRI);
3423 I.eraseFromParent();
3424 return constrainSelectedInstRegOperands(*AddP, TII, TRI, RBI);
3425 }
3426
3427 unsigned Opc = 0;
3428 if (VecTy == LLT::fixed_vector(16, 8))
3429 Opc = AArch64::ADDVv16i8v;
3430 else if (VecTy == LLT::fixed_vector(8, 16))
3431 Opc = AArch64::ADDVv8i16v;
3432 else if (VecTy == LLT::fixed_vector(4, 32))
3433 Opc = AArch64::ADDVv4i32v;
3434 else if (VecTy == LLT::fixed_vector(2, 64))
3435 Opc = AArch64::ADDPv2i64p;
3436 else {
3437 LLVM_DEBUG(dbgs() << "Unhandled type for add reduction");
3438 return false;
3439 }
3440 I.setDesc(TII.get(Opc));
3441 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3442 }
3443
3444 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
3445 unsigned Opc = 0;
3446 if (VecTy == LLT::fixed_vector(2, 32))
3447 Opc = AArch64::FADDPv2i32p;
3448 else if (VecTy == LLT::fixed_vector(2, 64))
3449 Opc = AArch64::FADDPv2i64p;
3450 else {
3451 LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction");
3452 return false;
3453 }
3454 I.setDesc(TII.get(Opc));
3455 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3456 }
3457 return false;
3458 }
3459
selectBrJT(MachineInstr & I,MachineRegisterInfo & MRI)3460 bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3461 MachineRegisterInfo &MRI) {
3462 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
3463 Register JTAddr = I.getOperand(0).getReg();
3464 unsigned JTI = I.getOperand(1).getIndex();
3465 Register Index = I.getOperand(2).getReg();
3466
3467 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3468 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3469
3470 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3471 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3472 {TargetReg, ScratchReg}, {JTAddr, Index})
3473 .addJumpTableIndex(JTI);
3474 // Build the indirect branch.
3475 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3476 I.eraseFromParent();
3477 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3478 }
3479
selectJumpTable(MachineInstr & I,MachineRegisterInfo & MRI)3480 bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3481 MachineRegisterInfo &MRI) {
3482 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
3483 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
3484
3485 Register DstReg = I.getOperand(0).getReg();
3486 unsigned JTI = I.getOperand(1).getIndex();
3487 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3488 auto MovMI =
3489 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3490 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3491 .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3492 I.eraseFromParent();
3493 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3494 }
3495
selectTLSGlobalValue(MachineInstr & I,MachineRegisterInfo & MRI)3496 bool AArch64InstructionSelector::selectTLSGlobalValue(
3497 MachineInstr &I, MachineRegisterInfo &MRI) {
3498 if (!STI.isTargetMachO())
3499 return false;
3500 MachineFunction &MF = *I.getParent()->getParent();
3501 MF.getFrameInfo().setAdjustsStack(true);
3502
3503 const auto &GlobalOp = I.getOperand(1);
3504 assert(GlobalOp.getOffset() == 0 &&
3505 "Shouldn't have an offset on TLS globals!");
3506 const GlobalValue &GV = *GlobalOp.getGlobal();
3507
3508 auto LoadGOT =
3509 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3510 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3511
3512 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3513 {LoadGOT.getReg(0)})
3514 .addImm(0);
3515
3516 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3517 // TLS calls preserve all registers except those that absolutely must be
3518 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3519 // silly).
3520 MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
3521 .addUse(AArch64::X0, RegState::Implicit)
3522 .addDef(AArch64::X0, RegState::Implicit)
3523 .addRegMask(TRI.getTLSCallPreservedMask());
3524
3525 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3526 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3527 MRI);
3528 I.eraseFromParent();
3529 return true;
3530 }
3531
selectIntrinsicTrunc(MachineInstr & I,MachineRegisterInfo & MRI) const3532 bool AArch64InstructionSelector::selectIntrinsicTrunc(
3533 MachineInstr &I, MachineRegisterInfo &MRI) const {
3534 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3535
3536 // Select the correct opcode.
3537 unsigned Opc = 0;
3538 if (!SrcTy.isVector()) {
3539 switch (SrcTy.getSizeInBits()) {
3540 default:
3541 case 16:
3542 Opc = AArch64::FRINTZHr;
3543 break;
3544 case 32:
3545 Opc = AArch64::FRINTZSr;
3546 break;
3547 case 64:
3548 Opc = AArch64::FRINTZDr;
3549 break;
3550 }
3551 } else {
3552 unsigned NumElts = SrcTy.getNumElements();
3553 switch (SrcTy.getElementType().getSizeInBits()) {
3554 default:
3555 break;
3556 case 16:
3557 if (NumElts == 4)
3558 Opc = AArch64::FRINTZv4f16;
3559 else if (NumElts == 8)
3560 Opc = AArch64::FRINTZv8f16;
3561 break;
3562 case 32:
3563 if (NumElts == 2)
3564 Opc = AArch64::FRINTZv2f32;
3565 else if (NumElts == 4)
3566 Opc = AArch64::FRINTZv4f32;
3567 break;
3568 case 64:
3569 if (NumElts == 2)
3570 Opc = AArch64::FRINTZv2f64;
3571 break;
3572 }
3573 }
3574
3575 if (!Opc) {
3576 // Didn't get an opcode above, bail.
3577 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
3578 return false;
3579 }
3580
3581 // Legalization would have set us up perfectly for this; we just need to
3582 // set the opcode and move on.
3583 I.setDesc(TII.get(Opc));
3584 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3585 }
3586
selectIntrinsicRound(MachineInstr & I,MachineRegisterInfo & MRI) const3587 bool AArch64InstructionSelector::selectIntrinsicRound(
3588 MachineInstr &I, MachineRegisterInfo &MRI) const {
3589 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3590
3591 // Select the correct opcode.
3592 unsigned Opc = 0;
3593 if (!SrcTy.isVector()) {
3594 switch (SrcTy.getSizeInBits()) {
3595 default:
3596 case 16:
3597 Opc = AArch64::FRINTAHr;
3598 break;
3599 case 32:
3600 Opc = AArch64::FRINTASr;
3601 break;
3602 case 64:
3603 Opc = AArch64::FRINTADr;
3604 break;
3605 }
3606 } else {
3607 unsigned NumElts = SrcTy.getNumElements();
3608 switch (SrcTy.getElementType().getSizeInBits()) {
3609 default:
3610 break;
3611 case 16:
3612 if (NumElts == 4)
3613 Opc = AArch64::FRINTAv4f16;
3614 else if (NumElts == 8)
3615 Opc = AArch64::FRINTAv8f16;
3616 break;
3617 case 32:
3618 if (NumElts == 2)
3619 Opc = AArch64::FRINTAv2f32;
3620 else if (NumElts == 4)
3621 Opc = AArch64::FRINTAv4f32;
3622 break;
3623 case 64:
3624 if (NumElts == 2)
3625 Opc = AArch64::FRINTAv2f64;
3626 break;
3627 }
3628 }
3629
3630 if (!Opc) {
3631 // Didn't get an opcode above, bail.
3632 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
3633 return false;
3634 }
3635
3636 // Legalization would have set us up perfectly for this; we just need to
3637 // set the opcode and move on.
3638 I.setDesc(TII.get(Opc));
3639 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3640 }
3641
selectVectorICmp(MachineInstr & I,MachineRegisterInfo & MRI)3642 bool AArch64InstructionSelector::selectVectorICmp(
3643 MachineInstr &I, MachineRegisterInfo &MRI) {
3644 Register DstReg = I.getOperand(0).getReg();
3645 LLT DstTy = MRI.getType(DstReg);
3646 Register SrcReg = I.getOperand(2).getReg();
3647 Register Src2Reg = I.getOperand(3).getReg();
3648 LLT SrcTy = MRI.getType(SrcReg);
3649
3650 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
3651 unsigned NumElts = DstTy.getNumElements();
3652
3653 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
3654 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
3655 // Third index is cc opcode:
3656 // 0 == eq
3657 // 1 == ugt
3658 // 2 == uge
3659 // 3 == ult
3660 // 4 == ule
3661 // 5 == sgt
3662 // 6 == sge
3663 // 7 == slt
3664 // 8 == sle
3665 // ne is done by negating 'eq' result.
3666
3667 // This table below assumes that for some comparisons the operands will be
3668 // commuted.
3669 // ult op == commute + ugt op
3670 // ule op == commute + uge op
3671 // slt op == commute + sgt op
3672 // sle op == commute + sge op
3673 unsigned PredIdx = 0;
3674 bool SwapOperands = false;
3675 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
3676 switch (Pred) {
3677 case CmpInst::ICMP_NE:
3678 case CmpInst::ICMP_EQ:
3679 PredIdx = 0;
3680 break;
3681 case CmpInst::ICMP_UGT:
3682 PredIdx = 1;
3683 break;
3684 case CmpInst::ICMP_UGE:
3685 PredIdx = 2;
3686 break;
3687 case CmpInst::ICMP_ULT:
3688 PredIdx = 3;
3689 SwapOperands = true;
3690 break;
3691 case CmpInst::ICMP_ULE:
3692 PredIdx = 4;
3693 SwapOperands = true;
3694 break;
3695 case CmpInst::ICMP_SGT:
3696 PredIdx = 5;
3697 break;
3698 case CmpInst::ICMP_SGE:
3699 PredIdx = 6;
3700 break;
3701 case CmpInst::ICMP_SLT:
3702 PredIdx = 7;
3703 SwapOperands = true;
3704 break;
3705 case CmpInst::ICMP_SLE:
3706 PredIdx = 8;
3707 SwapOperands = true;
3708 break;
3709 default:
3710 llvm_unreachable("Unhandled icmp predicate");
3711 return false;
3712 }
3713
3714 // This table obviously should be tablegen'd when we have our GISel native
3715 // tablegen selector.
3716
3717 static const unsigned OpcTable[4][4][9] = {
3718 {
3719 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3720 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3721 0 /* invalid */},
3722 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3723 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3724 0 /* invalid */},
3725 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3726 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3727 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3728 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3729 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3730 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3731 },
3732 {
3733 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3734 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3735 0 /* invalid */},
3736 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3737 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3738 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3739 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3740 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3741 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3742 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3743 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3744 0 /* invalid */}
3745 },
3746 {
3747 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3748 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3749 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3750 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3751 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3752 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3753 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3754 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3755 0 /* invalid */},
3756 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3757 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3758 0 /* invalid */}
3759 },
3760 {
3761 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3762 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3763 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3764 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3765 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3766 0 /* invalid */},
3767 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3768 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3769 0 /* invalid */},
3770 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3771 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3772 0 /* invalid */}
3773 },
3774 };
3775 unsigned EltIdx = Log2_32(SrcEltSize / 8);
3776 unsigned NumEltsIdx = Log2_32(NumElts / 2);
3777 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3778 if (!Opc) {
3779 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
3780 return false;
3781 }
3782
3783 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3784 const TargetRegisterClass *SrcRC =
3785 getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
3786 if (!SrcRC) {
3787 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3788 return false;
3789 }
3790
3791 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3792 if (SrcTy.getSizeInBits() == 128)
3793 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3794
3795 if (SwapOperands)
3796 std::swap(SrcReg, Src2Reg);
3797
3798 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
3799 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3800
3801 // Invert if we had a 'ne' cc.
3802 if (NotOpc) {
3803 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
3804 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3805 } else {
3806 MIB.buildCopy(DstReg, Cmp.getReg(0));
3807 }
3808 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
3809 I.eraseFromParent();
3810 return true;
3811 }
3812
emitScalarToVector(unsigned EltSize,const TargetRegisterClass * DstRC,Register Scalar,MachineIRBuilder & MIRBuilder) const3813 MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3814 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3815 MachineIRBuilder &MIRBuilder) const {
3816 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3817
3818 auto BuildFn = [&](unsigned SubregIndex) {
3819 auto Ins =
3820 MIRBuilder
3821 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3822 .addImm(SubregIndex);
3823 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
3824 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
3825 return &*Ins;
3826 };
3827
3828 switch (EltSize) {
3829 case 16:
3830 return BuildFn(AArch64::hsub);
3831 case 32:
3832 return BuildFn(AArch64::ssub);
3833 case 64:
3834 return BuildFn(AArch64::dsub);
3835 default:
3836 return nullptr;
3837 }
3838 }
3839
selectMergeValues(MachineInstr & I,MachineRegisterInfo & MRI)3840 bool AArch64InstructionSelector::selectMergeValues(
3841 MachineInstr &I, MachineRegisterInfo &MRI) {
3842 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3843 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3844 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3845 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3846 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3847
3848 if (I.getNumOperands() != 3)
3849 return false;
3850
3851 // Merging 2 s64s into an s128.
3852 if (DstTy == LLT::scalar(128)) {
3853 if (SrcTy.getSizeInBits() != 64)
3854 return false;
3855 Register DstReg = I.getOperand(0).getReg();
3856 Register Src1Reg = I.getOperand(1).getReg();
3857 Register Src2Reg = I.getOperand(2).getReg();
3858 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3859 MachineInstr *InsMI =
3860 emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
3861 if (!InsMI)
3862 return false;
3863 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3864 Src2Reg, /* LaneIdx */ 1, RB, MIB);
3865 if (!Ins2MI)
3866 return false;
3867 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3868 constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
3869 I.eraseFromParent();
3870 return true;
3871 }
3872
3873 if (RB.getID() != AArch64::GPRRegBankID)
3874 return false;
3875
3876 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3877 return false;
3878
3879 auto *DstRC = &AArch64::GPR64RegClass;
3880 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3881 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3882 TII.get(TargetOpcode::SUBREG_TO_REG))
3883 .addDef(SubToRegDef)
3884 .addImm(0)
3885 .addUse(I.getOperand(1).getReg())
3886 .addImm(AArch64::sub_32);
3887 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3888 // Need to anyext the second scalar before we can use bfm
3889 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3890 TII.get(TargetOpcode::SUBREG_TO_REG))
3891 .addDef(SubToRegDef2)
3892 .addImm(0)
3893 .addUse(I.getOperand(2).getReg())
3894 .addImm(AArch64::sub_32);
3895 MachineInstr &BFM =
3896 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3897 .addDef(I.getOperand(0).getReg())
3898 .addUse(SubToRegDef)
3899 .addUse(SubToRegDef2)
3900 .addImm(32)
3901 .addImm(31);
3902 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3903 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3904 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
3905 I.eraseFromParent();
3906 return true;
3907 }
3908
getLaneCopyOpcode(unsigned & CopyOpc,unsigned & ExtractSubReg,const unsigned EltSize)3909 static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3910 const unsigned EltSize) {
3911 // Choose a lane copy opcode and subregister based off of the size of the
3912 // vector's elements.
3913 switch (EltSize) {
3914 case 8:
3915 CopyOpc = AArch64::CPYi8;
3916 ExtractSubReg = AArch64::bsub;
3917 break;
3918 case 16:
3919 CopyOpc = AArch64::CPYi16;
3920 ExtractSubReg = AArch64::hsub;
3921 break;
3922 case 32:
3923 CopyOpc = AArch64::CPYi32;
3924 ExtractSubReg = AArch64::ssub;
3925 break;
3926 case 64:
3927 CopyOpc = AArch64::CPYi64;
3928 ExtractSubReg = AArch64::dsub;
3929 break;
3930 default:
3931 // Unknown size, bail out.
3932 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
3933 return false;
3934 }
3935 return true;
3936 }
3937
emitExtractVectorElt(Optional<Register> DstReg,const RegisterBank & DstRB,LLT ScalarTy,Register VecReg,unsigned LaneIdx,MachineIRBuilder & MIRBuilder) const3938 MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3939 Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3940 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3941 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3942 unsigned CopyOpc = 0;
3943 unsigned ExtractSubReg = 0;
3944 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3945 LLVM_DEBUG(
3946 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
3947 return nullptr;
3948 }
3949
3950 const TargetRegisterClass *DstRC =
3951 getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
3952 if (!DstRC) {
3953 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
3954 return nullptr;
3955 }
3956
3957 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3958 const LLT &VecTy = MRI.getType(VecReg);
3959 const TargetRegisterClass *VecRC =
3960 getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
3961 if (!VecRC) {
3962 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3963 return nullptr;
3964 }
3965
3966 // The register that we're going to copy into.
3967 Register InsertReg = VecReg;
3968 if (!DstReg)
3969 DstReg = MRI.createVirtualRegister(DstRC);
3970 // If the lane index is 0, we just use a subregister COPY.
3971 if (LaneIdx == 0) {
3972 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3973 .addReg(VecReg, 0, ExtractSubReg);
3974 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3975 return &*Copy;
3976 }
3977
3978 // Lane copies require 128-bit wide registers. If we're dealing with an
3979 // unpacked vector, then we need to move up to that width. Insert an implicit
3980 // def and a subregister insert to get us there.
3981 if (VecTy.getSizeInBits() != 128) {
3982 MachineInstr *ScalarToVector = emitScalarToVector(
3983 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3984 if (!ScalarToVector)
3985 return nullptr;
3986 InsertReg = ScalarToVector->getOperand(0).getReg();
3987 }
3988
3989 MachineInstr *LaneCopyMI =
3990 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3991 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
3992
3993 // Make sure that we actually constrain the initial copy.
3994 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3995 return LaneCopyMI;
3996 }
3997
selectExtractElt(MachineInstr & I,MachineRegisterInfo & MRI)3998 bool AArch64InstructionSelector::selectExtractElt(
3999 MachineInstr &I, MachineRegisterInfo &MRI) {
4000 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
4001 "unexpected opcode!");
4002 Register DstReg = I.getOperand(0).getReg();
4003 const LLT NarrowTy = MRI.getType(DstReg);
4004 const Register SrcReg = I.getOperand(1).getReg();
4005 const LLT WideTy = MRI.getType(SrcReg);
4006 (void)WideTy;
4007 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
4008 "source register size too small!");
4009 assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
4010
4011 // Need the lane index to determine the correct copy opcode.
4012 MachineOperand &LaneIdxOp = I.getOperand(2);
4013 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
4014
4015 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4016 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
4017 return false;
4018 }
4019
4020 // Find the index to extract from.
4021 auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
4022 if (!VRegAndVal)
4023 return false;
4024 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4025
4026
4027 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
4028 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4029 LaneIdx, MIB);
4030 if (!Extract)
4031 return false;
4032
4033 I.eraseFromParent();
4034 return true;
4035 }
4036
selectSplitVectorUnmerge(MachineInstr & I,MachineRegisterInfo & MRI)4037 bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4038 MachineInstr &I, MachineRegisterInfo &MRI) {
4039 unsigned NumElts = I.getNumOperands() - 1;
4040 Register SrcReg = I.getOperand(NumElts).getReg();
4041 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4042 const LLT SrcTy = MRI.getType(SrcReg);
4043
4044 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
4045 if (SrcTy.getSizeInBits() > 128) {
4046 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
4047 return false;
4048 }
4049
4050 // We implement a split vector operation by treating the sub-vectors as
4051 // scalars and extracting them.
4052 const RegisterBank &DstRB =
4053 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
4054 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4055 Register Dst = I.getOperand(OpIdx).getReg();
4056 MachineInstr *Extract =
4057 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4058 if (!Extract)
4059 return false;
4060 }
4061 I.eraseFromParent();
4062 return true;
4063 }
4064
selectUnmergeValues(MachineInstr & I,MachineRegisterInfo & MRI)4065 bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
4066 MachineRegisterInfo &MRI) {
4067 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4068 "unexpected opcode");
4069
4070 // TODO: Handle unmerging into GPRs and from scalars to scalars.
4071 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
4072 AArch64::FPRRegBankID ||
4073 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
4074 AArch64::FPRRegBankID) {
4075 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
4076 "currently unsupported.\n");
4077 return false;
4078 }
4079
4080 // The last operand is the vector source register, and every other operand is
4081 // a register to unpack into.
4082 unsigned NumElts = I.getNumOperands() - 1;
4083 Register SrcReg = I.getOperand(NumElts).getReg();
4084 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4085 const LLT WideTy = MRI.getType(SrcReg);
4086 (void)WideTy;
4087 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
4088 "can only unmerge from vector or s128 types!");
4089 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
4090 "source register size too small!");
4091
4092 if (!NarrowTy.isScalar())
4093 return selectSplitVectorUnmerge(I, MRI);
4094
4095 // Choose a lane copy opcode and subregister based off of the size of the
4096 // vector's elements.
4097 unsigned CopyOpc = 0;
4098 unsigned ExtractSubReg = 0;
4099 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
4100 return false;
4101
4102 // Set up for the lane copies.
4103 MachineBasicBlock &MBB = *I.getParent();
4104
4105 // Stores the registers we'll be copying from.
4106 SmallVector<Register, 4> InsertRegs;
4107
4108 // We'll use the first register twice, so we only need NumElts-1 registers.
4109 unsigned NumInsertRegs = NumElts - 1;
4110
4111 // If our elements fit into exactly 128 bits, then we can copy from the source
4112 // directly. Otherwise, we need to do a bit of setup with some subregister
4113 // inserts.
4114 if (NarrowTy.getSizeInBits() * NumElts == 128) {
4115 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4116 } else {
4117 // No. We have to perform subregister inserts. For each insert, create an
4118 // implicit def and a subregister insert, and save the register we create.
4119 const TargetRegisterClass *RC =
4120 getMinClassForRegBank(*RBI.getRegBank(SrcReg, MRI, TRI),
4121 WideTy.getScalarSizeInBits() * NumElts);
4122 unsigned SubReg = 0;
4123 bool Found = getSubRegForClass(RC, TRI, SubReg);
4124 (void)Found;
4125 assert(Found && "expected to find last operand's subeg idx");
4126 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4127 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4128 MachineInstr &ImpDefMI =
4129 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4130 ImpDefReg);
4131
4132 // Now, create the subregister insert from SrcReg.
4133 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4134 MachineInstr &InsMI =
4135 *BuildMI(MBB, I, I.getDebugLoc(),
4136 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4137 .addUse(ImpDefReg)
4138 .addUse(SrcReg)
4139 .addImm(SubReg);
4140
4141 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4142 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
4143
4144 // Save the register so that we can copy from it after.
4145 InsertRegs.push_back(InsertReg);
4146 }
4147 }
4148
4149 // Now that we've created any necessary subregister inserts, we can
4150 // create the copies.
4151 //
4152 // Perform the first copy separately as a subregister copy.
4153 Register CopyTo = I.getOperand(0).getReg();
4154 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4155 .addReg(InsertRegs[0], 0, ExtractSubReg);
4156 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4157
4158 // Now, perform the remaining copies as vector lane copies.
4159 unsigned LaneIdx = 1;
4160 for (Register InsReg : InsertRegs) {
4161 Register CopyTo = I.getOperand(LaneIdx).getReg();
4162 MachineInstr &CopyInst =
4163 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4164 .addUse(InsReg)
4165 .addImm(LaneIdx);
4166 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4167 ++LaneIdx;
4168 }
4169
4170 // Separately constrain the first copy's destination. Because of the
4171 // limitation in constrainOperandRegClass, we can't guarantee that this will
4172 // actually be constrained. So, do it ourselves using the second operand.
4173 const TargetRegisterClass *RC =
4174 MRI.getRegClassOrNull(I.getOperand(1).getReg());
4175 if (!RC) {
4176 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
4177 return false;
4178 }
4179
4180 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4181 I.eraseFromParent();
4182 return true;
4183 }
4184
selectConcatVectors(MachineInstr & I,MachineRegisterInfo & MRI)4185 bool AArch64InstructionSelector::selectConcatVectors(
4186 MachineInstr &I, MachineRegisterInfo &MRI) {
4187 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4188 "Unexpected opcode");
4189 Register Dst = I.getOperand(0).getReg();
4190 Register Op1 = I.getOperand(1).getReg();
4191 Register Op2 = I.getOperand(2).getReg();
4192 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4193 if (!ConcatMI)
4194 return false;
4195 I.eraseFromParent();
4196 return true;
4197 }
4198
4199 unsigned
emitConstantPoolEntry(const Constant * CPVal,MachineFunction & MF) const4200 AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4201 MachineFunction &MF) const {
4202 Type *CPTy = CPVal->getType();
4203 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4204
4205 MachineConstantPool *MCP = MF.getConstantPool();
4206 return MCP->getConstantPoolIndex(CPVal, Alignment);
4207 }
4208
emitLoadFromConstantPool(const Constant * CPVal,MachineIRBuilder & MIRBuilder) const4209 MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4210 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4211 auto &MF = MIRBuilder.getMF();
4212 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4213
4214 auto Adrp =
4215 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4216 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4217
4218 MachineInstr *LoadMI = nullptr;
4219 MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
4220 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4221 switch (Size) {
4222 case 16:
4223 LoadMI =
4224 &*MIRBuilder
4225 .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
4226 .addConstantPoolIndex(CPIdx, 0,
4227 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4228 break;
4229 case 8:
4230 LoadMI =
4231 &*MIRBuilder
4232 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
4233 .addConstantPoolIndex(CPIdx, 0,
4234 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4235 break;
4236 case 4:
4237 LoadMI =
4238 &*MIRBuilder
4239 .buildInstr(AArch64::LDRSui, {&AArch64::FPR32RegClass}, {Adrp})
4240 .addConstantPoolIndex(CPIdx, 0,
4241 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4242 break;
4243 case 2:
4244 LoadMI =
4245 &*MIRBuilder
4246 .buildInstr(AArch64::LDRHui, {&AArch64::FPR16RegClass}, {Adrp})
4247 .addConstantPoolIndex(CPIdx, 0,
4248 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4249 break;
4250 default:
4251 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
4252 << *CPVal->getType());
4253 return nullptr;
4254 }
4255 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4256 MachineMemOperand::MOLoad,
4257 Size, Align(Size)));
4258 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
4259 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
4260 return LoadMI;
4261 }
4262
4263 /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4264 /// size and RB.
4265 static std::pair<unsigned, unsigned>
getInsertVecEltOpInfo(const RegisterBank & RB,unsigned EltSize)4266 getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4267 unsigned Opc, SubregIdx;
4268 if (RB.getID() == AArch64::GPRRegBankID) {
4269 if (EltSize == 16) {
4270 Opc = AArch64::INSvi16gpr;
4271 SubregIdx = AArch64::ssub;
4272 } else if (EltSize == 32) {
4273 Opc = AArch64::INSvi32gpr;
4274 SubregIdx = AArch64::ssub;
4275 } else if (EltSize == 64) {
4276 Opc = AArch64::INSvi64gpr;
4277 SubregIdx = AArch64::dsub;
4278 } else {
4279 llvm_unreachable("invalid elt size!");
4280 }
4281 } else {
4282 if (EltSize == 8) {
4283 Opc = AArch64::INSvi8lane;
4284 SubregIdx = AArch64::bsub;
4285 } else if (EltSize == 16) {
4286 Opc = AArch64::INSvi16lane;
4287 SubregIdx = AArch64::hsub;
4288 } else if (EltSize == 32) {
4289 Opc = AArch64::INSvi32lane;
4290 SubregIdx = AArch64::ssub;
4291 } else if (EltSize == 64) {
4292 Opc = AArch64::INSvi64lane;
4293 SubregIdx = AArch64::dsub;
4294 } else {
4295 llvm_unreachable("invalid elt size!");
4296 }
4297 }
4298 return std::make_pair(Opc, SubregIdx);
4299 }
4300
emitInstr(unsigned Opcode,std::initializer_list<llvm::DstOp> DstOps,std::initializer_list<llvm::SrcOp> SrcOps,MachineIRBuilder & MIRBuilder,const ComplexRendererFns & RenderFns) const4301 MachineInstr *AArch64InstructionSelector::emitInstr(
4302 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4303 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4304 const ComplexRendererFns &RenderFns) const {
4305 assert(Opcode && "Expected an opcode?");
4306 assert(!isPreISelGenericOpcode(Opcode) &&
4307 "Function should only be used to produce selected instructions!");
4308 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4309 if (RenderFns)
4310 for (auto &Fn : *RenderFns)
4311 Fn(MI);
4312 constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
4313 return &*MI;
4314 }
4315
emitAddSub(const std::array<std::array<unsigned,2>,5> & AddrModeAndSizeToOpcode,Register Dst,MachineOperand & LHS,MachineOperand & RHS,MachineIRBuilder & MIRBuilder) const4316 MachineInstr *AArch64InstructionSelector::emitAddSub(
4317 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4318 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4319 MachineIRBuilder &MIRBuilder) const {
4320 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4321 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4322 auto Ty = MRI.getType(LHS.getReg());
4323 assert(!Ty.isVector() && "Expected a scalar or pointer?");
4324 unsigned Size = Ty.getSizeInBits();
4325 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
4326 bool Is32Bit = Size == 32;
4327
4328 // INSTRri form with positive arithmetic immediate.
4329 if (auto Fns = selectArithImmed(RHS))
4330 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4331 MIRBuilder, Fns);
4332
4333 // INSTRri form with negative arithmetic immediate.
4334 if (auto Fns = selectNegArithImmed(RHS))
4335 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4336 MIRBuilder, Fns);
4337
4338 // INSTRrx form.
4339 if (auto Fns = selectArithExtendedRegister(RHS))
4340 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4341 MIRBuilder, Fns);
4342
4343 // INSTRrs form.
4344 if (auto Fns = selectShiftedRegister(RHS))
4345 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4346 MIRBuilder, Fns);
4347 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4348 MIRBuilder);
4349 }
4350
4351 MachineInstr *
emitADD(Register DefReg,MachineOperand & LHS,MachineOperand & RHS,MachineIRBuilder & MIRBuilder) const4352 AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4353 MachineOperand &RHS,
4354 MachineIRBuilder &MIRBuilder) const {
4355 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4356 {{AArch64::ADDXri, AArch64::ADDWri},
4357 {AArch64::ADDXrs, AArch64::ADDWrs},
4358 {AArch64::ADDXrr, AArch64::ADDWrr},
4359 {AArch64::SUBXri, AArch64::SUBWri},
4360 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4361 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4362 }
4363
4364 MachineInstr *
emitADDS(Register Dst,MachineOperand & LHS,MachineOperand & RHS,MachineIRBuilder & MIRBuilder) const4365 AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4366 MachineOperand &RHS,
4367 MachineIRBuilder &MIRBuilder) const {
4368 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4369 {{AArch64::ADDSXri, AArch64::ADDSWri},
4370 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4371 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4372 {AArch64::SUBSXri, AArch64::SUBSWri},
4373 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4374 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4375 }
4376
4377 MachineInstr *
emitSUBS(Register Dst,MachineOperand & LHS,MachineOperand & RHS,MachineIRBuilder & MIRBuilder) const4378 AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4379 MachineOperand &RHS,
4380 MachineIRBuilder &MIRBuilder) const {
4381 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4382 {{AArch64::SUBSXri, AArch64::SUBSWri},
4383 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4384 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4385 {AArch64::ADDSXri, AArch64::ADDSWri},
4386 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4387 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4388 }
4389
4390 MachineInstr *
emitCMN(MachineOperand & LHS,MachineOperand & RHS,MachineIRBuilder & MIRBuilder) const4391 AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4392 MachineIRBuilder &MIRBuilder) const {
4393 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4394 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4395 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4396 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4397 }
4398
4399 MachineInstr *
emitTST(MachineOperand & LHS,MachineOperand & RHS,MachineIRBuilder & MIRBuilder) const4400 AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4401 MachineIRBuilder &MIRBuilder) const {
4402 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4403 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4404 LLT Ty = MRI.getType(LHS.getReg());
4405 unsigned RegSize = Ty.getSizeInBits();
4406 bool Is32Bit = (RegSize == 32);
4407 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4408 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4409 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4410 // ANDS needs a logical immediate for its immediate form. Check if we can
4411 // fold one in.
4412 if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4413 int64_t Imm = ValAndVReg->Value.getSExtValue();
4414
4415 if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {
4416 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4417 TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
4418 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
4419 return &*TstMI;
4420 }
4421 }
4422
4423 if (auto Fns = selectLogicalShiftedRegister(RHS))
4424 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4425 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4426 }
4427
emitIntegerCompare(MachineOperand & LHS,MachineOperand & RHS,MachineOperand & Predicate,MachineIRBuilder & MIRBuilder) const4428 MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4429 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4430 MachineIRBuilder &MIRBuilder) const {
4431 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
4432 assert(Predicate.isPredicate() && "Expected predicate?");
4433 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4434 LLT CmpTy = MRI.getType(LHS.getReg());
4435 assert(!CmpTy.isVector() && "Expected scalar or pointer");
4436 unsigned Size = CmpTy.getSizeInBits();
4437 (void)Size;
4438 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
4439 // Fold the compare into a cmn or tst if possible.
4440 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4441 return FoldCmp;
4442 auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4443 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4444 }
4445
emitCSetForFCmp(Register Dst,CmpInst::Predicate Pred,MachineIRBuilder & MIRBuilder) const4446 MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4447 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4448 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4449 #ifndef NDEBUG
4450 LLT Ty = MRI.getType(Dst);
4451 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
4452 "Expected a 32-bit scalar register?");
4453 #endif
4454 const Register ZeroReg = AArch64::WZR;
4455 auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) {
4456 auto CSet =
4457 MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg})
4458 .addImm(getInvertedCondCode(CC));
4459 constrainSelectedInstRegOperands(*CSet, TII, TRI, RBI);
4460 return &*CSet;
4461 };
4462
4463 AArch64CC::CondCode CC1, CC2;
4464 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4465 if (CC2 == AArch64CC::AL)
4466 return EmitCSet(Dst, CC1);
4467
4468 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4469 Register Def1Reg = MRI.createVirtualRegister(RC);
4470 Register Def2Reg = MRI.createVirtualRegister(RC);
4471 EmitCSet(Def1Reg, CC1);
4472 EmitCSet(Def2Reg, CC2);
4473 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4474 constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
4475 return &*OrMI;
4476 }
4477
4478 MachineInstr *
emitFPCompare(Register LHS,Register RHS,MachineIRBuilder & MIRBuilder,Optional<CmpInst::Predicate> Pred) const4479 AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
4480 MachineIRBuilder &MIRBuilder,
4481 Optional<CmpInst::Predicate> Pred) const {
4482 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4483 LLT Ty = MRI.getType(LHS);
4484 if (Ty.isVector())
4485 return nullptr;
4486 unsigned OpSize = Ty.getSizeInBits();
4487 if (OpSize != 32 && OpSize != 64)
4488 return nullptr;
4489
4490 // If this is a compare against +0.0, then we don't have
4491 // to explicitly materialize a constant.
4492 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4493 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4494
4495 auto IsEqualityPred = [](CmpInst::Predicate P) {
4496 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4497 P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;
4498 };
4499 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4500 // Try commutating the operands.
4501 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4502 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4503 ShouldUseImm = true;
4504 std::swap(LHS, RHS);
4505 }
4506 }
4507 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
4508 {AArch64::FCMPSri, AArch64::FCMPDri}};
4509 unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
4510
4511 // Partially build the compare. Decide if we need to add a use for the
4512 // third operand based off whether or not we're comparing against 0.0.
4513 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4514 if (!ShouldUseImm)
4515 CmpMI.addUse(RHS);
4516 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
4517 return &*CmpMI;
4518 }
4519
emitVectorConcat(Optional<Register> Dst,Register Op1,Register Op2,MachineIRBuilder & MIRBuilder) const4520 MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4521 Optional<Register> Dst, Register Op1, Register Op2,
4522 MachineIRBuilder &MIRBuilder) const {
4523 // We implement a vector concat by:
4524 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4525 // 2. Insert the upper vector into the destination's upper element
4526 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4527 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4528
4529 const LLT Op1Ty = MRI.getType(Op1);
4530 const LLT Op2Ty = MRI.getType(Op2);
4531
4532 if (Op1Ty != Op2Ty) {
4533 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
4534 return nullptr;
4535 }
4536 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
4537
4538 if (Op1Ty.getSizeInBits() >= 128) {
4539 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
4540 return nullptr;
4541 }
4542
4543 // At the moment we just support 64 bit vector concats.
4544 if (Op1Ty.getSizeInBits() != 64) {
4545 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
4546 return nullptr;
4547 }
4548
4549 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4550 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4551 const TargetRegisterClass *DstRC =
4552 getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
4553
4554 MachineInstr *WidenedOp1 =
4555 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4556 MachineInstr *WidenedOp2 =
4557 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4558 if (!WidenedOp1 || !WidenedOp2) {
4559 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
4560 return nullptr;
4561 }
4562
4563 // Now do the insert of the upper element.
4564 unsigned InsertOpc, InsSubRegIdx;
4565 std::tie(InsertOpc, InsSubRegIdx) =
4566 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4567
4568 if (!Dst)
4569 Dst = MRI.createVirtualRegister(DstRC);
4570 auto InsElt =
4571 MIRBuilder
4572 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4573 .addImm(1) /* Lane index */
4574 .addUse(WidenedOp2->getOperand(0).getReg())
4575 .addImm(0);
4576 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4577 return &*InsElt;
4578 }
4579
4580 MachineInstr *
emitCSetForICMP(Register DefReg,unsigned Pred,MachineIRBuilder & MIRBuilder,Register SrcReg) const4581 AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
4582 MachineIRBuilder &MIRBuilder,
4583 Register SrcReg) const {
4584 // CSINC increments the result when the predicate is false. Invert it.
4585 const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
4586 CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
4587 auto I = MIRBuilder.buildInstr(AArch64::CSINCWr, {DefReg}, {SrcReg, SrcReg})
4588 .addImm(InvCC);
4589 constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
4590 return &*I;
4591 }
4592
4593 std::pair<MachineInstr *, AArch64CC::CondCode>
emitOverflowOp(unsigned Opcode,Register Dst,MachineOperand & LHS,MachineOperand & RHS,MachineIRBuilder & MIRBuilder) const4594 AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4595 MachineOperand &LHS,
4596 MachineOperand &RHS,
4597 MachineIRBuilder &MIRBuilder) const {
4598 switch (Opcode) {
4599 default:
4600 llvm_unreachable("Unexpected opcode!");
4601 case TargetOpcode::G_SADDO:
4602 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4603 case TargetOpcode::G_UADDO:
4604 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4605 case TargetOpcode::G_SSUBO:
4606 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4607 case TargetOpcode::G_USUBO:
4608 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4609 }
4610 }
4611
tryOptSelect(MachineInstr & I)4612 bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) {
4613 MachineRegisterInfo &MRI = *MIB.getMRI();
4614 // We want to recognize this pattern:
4615 //
4616 // $z = G_FCMP pred, $x, $y
4617 // ...
4618 // $w = G_SELECT $z, $a, $b
4619 //
4620 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
4621 // some copies/truncs in between.)
4622 //
4623 // If we see this, then we can emit something like this:
4624 //
4625 // fcmp $x, $y
4626 // fcsel $w, $a, $b, pred
4627 //
4628 // Rather than emitting both of the rather long sequences in the standard
4629 // G_FCMP/G_SELECT select methods.
4630
4631 // First, check if the condition is defined by a compare.
4632 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
4633 while (CondDef) {
4634 // We can only fold if all of the defs have one use.
4635 Register CondDefReg = CondDef->getOperand(0).getReg();
4636 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
4637 // Unless it's another select.
4638 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
4639 if (CondDef == &UI)
4640 continue;
4641 if (UI.getOpcode() != TargetOpcode::G_SELECT)
4642 return false;
4643 }
4644 }
4645
4646 // We can skip over G_TRUNC since the condition is 1-bit.
4647 // Truncating/extending can have no impact on the value.
4648 unsigned Opc = CondDef->getOpcode();
4649 if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
4650 break;
4651
4652 // Can't see past copies from physregs.
4653 if (Opc == TargetOpcode::COPY &&
4654 Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
4655 return false;
4656
4657 CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
4658 }
4659
4660 // Is the condition defined by a compare?
4661 if (!CondDef)
4662 return false;
4663
4664 unsigned CondOpc = CondDef->getOpcode();
4665 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
4666 return false;
4667
4668 AArch64CC::CondCode CondCode;
4669 if (CondOpc == TargetOpcode::G_ICMP) {
4670 auto Pred =
4671 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4672 CondCode = changeICMPPredToAArch64CC(Pred);
4673 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
4674 CondDef->getOperand(1), MIB);
4675 } else {
4676 // Get the condition code for the select.
4677 auto Pred =
4678 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4679 AArch64CC::CondCode CondCode2;
4680 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
4681
4682 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
4683 // instructions to emit the comparison.
4684 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
4685 // unnecessary.
4686 if (CondCode2 != AArch64CC::AL)
4687 return false;
4688
4689 if (!emitFPCompare(CondDef->getOperand(2).getReg(),
4690 CondDef->getOperand(3).getReg(), MIB)) {
4691 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
4692 return false;
4693 }
4694 }
4695
4696 // Emit the select.
4697 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
4698 I.getOperand(3).getReg(), CondCode, MIB);
4699 I.eraseFromParent();
4700 return true;
4701 }
4702
tryFoldIntegerCompare(MachineOperand & LHS,MachineOperand & RHS,MachineOperand & Predicate,MachineIRBuilder & MIRBuilder) const4703 MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
4704 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4705 MachineIRBuilder &MIRBuilder) const {
4706 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
4707 "Unexpected MachineOperand");
4708 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4709 // We want to find this sort of thing:
4710 // x = G_SUB 0, y
4711 // G_ICMP z, x
4712 //
4713 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
4714 // e.g:
4715 //
4716 // cmn z, y
4717
4718 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
4719 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
4720 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
4721 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
4722 // Given this:
4723 //
4724 // x = G_SUB 0, y
4725 // G_ICMP x, z
4726 //
4727 // Produce this:
4728 //
4729 // cmn y, z
4730 if (isCMN(LHSDef, P, MRI))
4731 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
4732
4733 // Same idea here, but with the RHS of the compare instead:
4734 //
4735 // Given this:
4736 //
4737 // x = G_SUB 0, y
4738 // G_ICMP z, x
4739 //
4740 // Produce this:
4741 //
4742 // cmn z, y
4743 if (isCMN(RHSDef, P, MRI))
4744 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
4745
4746 // Given this:
4747 //
4748 // z = G_AND x, y
4749 // G_ICMP z, 0
4750 //
4751 // Produce this if the compare is signed:
4752 //
4753 // tst x, y
4754 if (!CmpInst::isUnsigned(P) && LHSDef &&
4755 LHSDef->getOpcode() == TargetOpcode::G_AND) {
4756 // Make sure that the RHS is 0.
4757 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
4758 if (!ValAndVReg || ValAndVReg->Value != 0)
4759 return nullptr;
4760
4761 return emitTST(LHSDef->getOperand(1),
4762 LHSDef->getOperand(2), MIRBuilder);
4763 }
4764
4765 return nullptr;
4766 }
4767
selectShuffleVector(MachineInstr & I,MachineRegisterInfo & MRI)4768 bool AArch64InstructionSelector::selectShuffleVector(
4769 MachineInstr &I, MachineRegisterInfo &MRI) {
4770 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4771 Register Src1Reg = I.getOperand(1).getReg();
4772 const LLT Src1Ty = MRI.getType(Src1Reg);
4773 Register Src2Reg = I.getOperand(2).getReg();
4774 const LLT Src2Ty = MRI.getType(Src2Reg);
4775 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
4776
4777 MachineBasicBlock &MBB = *I.getParent();
4778 MachineFunction &MF = *MBB.getParent();
4779 LLVMContext &Ctx = MF.getFunction().getContext();
4780
4781 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
4782 // it's originated from a <1 x T> type. Those should have been lowered into
4783 // G_BUILD_VECTOR earlier.
4784 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
4785 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
4786 return false;
4787 }
4788
4789 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
4790
4791 SmallVector<Constant *, 64> CstIdxs;
4792 for (int Val : Mask) {
4793 // For now, any undef indexes we'll just assume to be 0. This should be
4794 // optimized in future, e.g. to select DUP etc.
4795 Val = Val < 0 ? 0 : Val;
4796 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
4797 unsigned Offset = Byte + Val * BytesPerElt;
4798 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
4799 }
4800 }
4801
4802 // Use a constant pool to load the index vector for TBL.
4803 Constant *CPVal = ConstantVector::get(CstIdxs);
4804 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
4805 if (!IndexLoad) {
4806 LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
4807 return false;
4808 }
4809
4810 if (DstTy.getSizeInBits() != 128) {
4811 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
4812 // This case can be done with TBL1.
4813 MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIB);
4814 if (!Concat) {
4815 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
4816 return false;
4817 }
4818
4819 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
4820 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
4821 IndexLoad->getOperand(0).getReg(), MIB);
4822
4823 auto TBL1 = MIB.buildInstr(
4824 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
4825 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
4826 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
4827
4828 auto Copy =
4829 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
4830 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
4831 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
4832 I.eraseFromParent();
4833 return true;
4834 }
4835
4836 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
4837 // Q registers for regalloc.
4838 SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
4839 auto RegSeq = createQTuple(Regs, MIB);
4840 auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
4841 {RegSeq, IndexLoad->getOperand(0)});
4842 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
4843 I.eraseFromParent();
4844 return true;
4845 }
4846
emitLaneInsert(Optional<Register> DstReg,Register SrcReg,Register EltReg,unsigned LaneIdx,const RegisterBank & RB,MachineIRBuilder & MIRBuilder) const4847 MachineInstr *AArch64InstructionSelector::emitLaneInsert(
4848 Optional<Register> DstReg, Register SrcReg, Register EltReg,
4849 unsigned LaneIdx, const RegisterBank &RB,
4850 MachineIRBuilder &MIRBuilder) const {
4851 MachineInstr *InsElt = nullptr;
4852 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
4853 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4854
4855 // Create a register to define with the insert if one wasn't passed in.
4856 if (!DstReg)
4857 DstReg = MRI.createVirtualRegister(DstRC);
4858
4859 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
4860 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
4861
4862 if (RB.getID() == AArch64::FPRRegBankID) {
4863 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
4864 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4865 .addImm(LaneIdx)
4866 .addUse(InsSub->getOperand(0).getReg())
4867 .addImm(0);
4868 } else {
4869 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4870 .addImm(LaneIdx)
4871 .addUse(EltReg);
4872 }
4873
4874 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4875 return InsElt;
4876 }
4877
selectUSMovFromExtend(MachineInstr & MI,MachineRegisterInfo & MRI)4878 bool AArch64InstructionSelector::selectUSMovFromExtend(
4879 MachineInstr &MI, MachineRegisterInfo &MRI) {
4880 if (MI.getOpcode() != TargetOpcode::G_SEXT &&
4881 MI.getOpcode() != TargetOpcode::G_ZEXT &&
4882 MI.getOpcode() != TargetOpcode::G_ANYEXT)
4883 return false;
4884 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
4885 const Register DefReg = MI.getOperand(0).getReg();
4886 const LLT DstTy = MRI.getType(DefReg);
4887 unsigned DstSize = DstTy.getSizeInBits();
4888
4889 if (DstSize != 32 && DstSize != 64)
4890 return false;
4891
4892 MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
4893 MI.getOperand(1).getReg(), MRI);
4894 int64_t Lane;
4895 if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
4896 return false;
4897 Register Src0 = Extract->getOperand(1).getReg();
4898
4899 const LLT &VecTy = MRI.getType(Src0);
4900
4901 if (VecTy.getSizeInBits() != 128) {
4902 const MachineInstr *ScalarToVector = emitScalarToVector(
4903 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
4904 assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!");
4905 Src0 = ScalarToVector->getOperand(0).getReg();
4906 }
4907
4908 unsigned Opcode;
4909 if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
4910 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
4911 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
4912 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
4913 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
4914 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
4915 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
4916 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
4917 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
4918 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
4919 else
4920 llvm_unreachable("Unexpected type combo for S/UMov!");
4921
4922 // We may need to generate one of these, depending on the type and sign of the
4923 // input:
4924 // DstReg = SMOV Src0, Lane;
4925 // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
4926 MachineInstr *ExtI = nullptr;
4927 if (DstSize == 64 && !IsSigned) {
4928 Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
4929 MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
4930 ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
4931 .addImm(0)
4932 .addUse(NewReg)
4933 .addImm(AArch64::sub_32);
4934 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
4935 } else
4936 ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
4937
4938 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
4939 MI.eraseFromParent();
4940 return true;
4941 }
4942
selectInsertElt(MachineInstr & I,MachineRegisterInfo & MRI)4943 bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I,
4944 MachineRegisterInfo &MRI) {
4945 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
4946
4947 // Get information on the destination.
4948 Register DstReg = I.getOperand(0).getReg();
4949 const LLT DstTy = MRI.getType(DstReg);
4950 unsigned VecSize = DstTy.getSizeInBits();
4951
4952 // Get information on the element we want to insert into the destination.
4953 Register EltReg = I.getOperand(2).getReg();
4954 const LLT EltTy = MRI.getType(EltReg);
4955 unsigned EltSize = EltTy.getSizeInBits();
4956 if (EltSize < 16 || EltSize > 64)
4957 return false; // Don't support all element types yet.
4958
4959 // Find the definition of the index. Bail out if it's not defined by a
4960 // G_CONSTANT.
4961 Register IdxReg = I.getOperand(3).getReg();
4962 auto VRegAndVal = getIConstantVRegValWithLookThrough(IdxReg, MRI);
4963 if (!VRegAndVal)
4964 return false;
4965 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4966
4967 // Perform the lane insert.
4968 Register SrcReg = I.getOperand(1).getReg();
4969 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
4970
4971 if (VecSize < 128) {
4972 // If the vector we're inserting into is smaller than 128 bits, widen it
4973 // to 128 to do the insert.
4974 MachineInstr *ScalarToVec =
4975 emitScalarToVector(VecSize, &AArch64::FPR128RegClass, SrcReg, MIB);
4976 if (!ScalarToVec)
4977 return false;
4978 SrcReg = ScalarToVec->getOperand(0).getReg();
4979 }
4980
4981 // Create an insert into a new FPR128 register.
4982 // Note that if our vector is already 128 bits, we end up emitting an extra
4983 // register.
4984 MachineInstr *InsMI =
4985 emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIB);
4986
4987 if (VecSize < 128) {
4988 // If we had to widen to perform the insert, then we have to demote back to
4989 // the original size to get the result we want.
4990 Register DemoteVec = InsMI->getOperand(0).getReg();
4991 const TargetRegisterClass *RC =
4992 getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
4993 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
4994 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
4995 return false;
4996 }
4997 unsigned SubReg = 0;
4998 if (!getSubRegForClass(RC, TRI, SubReg))
4999 return false;
5000 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5001 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
5002 << "\n");
5003 return false;
5004 }
5005 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
5006 .addReg(DemoteVec, 0, SubReg);
5007 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5008 } else {
5009 // No widening needed.
5010 InsMI->getOperand(0).setReg(DstReg);
5011 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
5012 }
5013
5014 I.eraseFromParent();
5015 return true;
5016 }
5017
5018 MachineInstr *
emitConstantVector(Register Dst,Constant * CV,MachineIRBuilder & MIRBuilder,MachineRegisterInfo & MRI)5019 AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5020 MachineIRBuilder &MIRBuilder,
5021 MachineRegisterInfo &MRI) {
5022 LLT DstTy = MRI.getType(Dst);
5023 unsigned DstSize = DstTy.getSizeInBits();
5024 if (CV->isNullValue()) {
5025 if (DstSize == 128) {
5026 auto Mov =
5027 MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5028 constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
5029 return &*Mov;
5030 }
5031
5032 if (DstSize == 64) {
5033 auto Mov =
5034 MIRBuilder
5035 .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5036 .addImm(0);
5037 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
5038 .addReg(Mov.getReg(0), 0, AArch64::dsub);
5039 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
5040 return &*Copy;
5041 }
5042 }
5043
5044 auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
5045 if (!CPLoad) {
5046 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");
5047 return nullptr;
5048 }
5049
5050 auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
5051 RBI.constrainGenericRegister(
5052 Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
5053 return &*Copy;
5054 }
5055
tryOptConstantBuildVec(MachineInstr & I,LLT DstTy,MachineRegisterInfo & MRI)5056 bool AArch64InstructionSelector::tryOptConstantBuildVec(
5057 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) {
5058 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5059 unsigned DstSize = DstTy.getSizeInBits();
5060 assert(DstSize <= 128 && "Unexpected build_vec type!");
5061 if (DstSize < 32)
5062 return false;
5063 // Check if we're building a constant vector, in which case we want to
5064 // generate a constant pool load instead of a vector insert sequence.
5065 SmallVector<Constant *, 16> Csts;
5066 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
5067 // Try to find G_CONSTANT or G_FCONSTANT
5068 auto *OpMI =
5069 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
5070 if (OpMI)
5071 Csts.emplace_back(
5072 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
5073 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
5074 I.getOperand(Idx).getReg(), MRI)))
5075 Csts.emplace_back(
5076 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
5077 else
5078 return false;
5079 }
5080 Constant *CV = ConstantVector::get(Csts);
5081 if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
5082 return false;
5083 I.eraseFromParent();
5084 return true;
5085 }
5086
tryOptBuildVecToSubregToReg(MachineInstr & I,MachineRegisterInfo & MRI)5087 bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5088 MachineInstr &I, MachineRegisterInfo &MRI) {
5089 // Given:
5090 // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
5091 //
5092 // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
5093 Register Dst = I.getOperand(0).getReg();
5094 Register EltReg = I.getOperand(1).getReg();
5095 LLT EltTy = MRI.getType(EltReg);
5096 // If the index isn't on the same bank as its elements, then this can't be a
5097 // SUBREG_TO_REG.
5098 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5099 const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
5100 if (EltRB != DstRB)
5101 return false;
5102 if (any_of(make_range(I.operands_begin() + 2, I.operands_end()),
5103 [&MRI](const MachineOperand &Op) {
5104 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(),
5105 MRI);
5106 }))
5107 return false;
5108 unsigned SubReg;
5109 const TargetRegisterClass *EltRC =
5110 getMinClassForRegBank(EltRB, EltTy.getSizeInBits());
5111 if (!EltRC)
5112 return false;
5113 const TargetRegisterClass *DstRC =
5114 getMinClassForRegBank(DstRB, MRI.getType(Dst).getSizeInBits());
5115 if (!DstRC)
5116 return false;
5117 if (!getSubRegForClass(EltRC, TRI, SubReg))
5118 return false;
5119 auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5120 .addImm(0)
5121 .addUse(EltReg)
5122 .addImm(SubReg);
5123 I.eraseFromParent();
5124 constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
5125 return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
5126 }
5127
selectBuildVector(MachineInstr & I,MachineRegisterInfo & MRI)5128 bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
5129 MachineRegisterInfo &MRI) {
5130 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5131 // Until we port more of the optimized selections, for now just use a vector
5132 // insert sequence.
5133 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5134 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
5135 unsigned EltSize = EltTy.getSizeInBits();
5136
5137 if (tryOptConstantBuildVec(I, DstTy, MRI))
5138 return true;
5139 if (tryOptBuildVecToSubregToReg(I, MRI))
5140 return true;
5141
5142 if (EltSize < 16 || EltSize > 64)
5143 return false; // Don't support all element types yet.
5144 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
5145
5146 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5147 MachineInstr *ScalarToVec =
5148 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
5149 I.getOperand(1).getReg(), MIB);
5150 if (!ScalarToVec)
5151 return false;
5152
5153 Register DstVec = ScalarToVec->getOperand(0).getReg();
5154 unsigned DstSize = DstTy.getSizeInBits();
5155
5156 // Keep track of the last MI we inserted. Later on, we might be able to save
5157 // a copy using it.
5158 MachineInstr *PrevMI = nullptr;
5159 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
5160 // Note that if we don't do a subregister copy, we can end up making an
5161 // extra register.
5162 PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
5163 MIB);
5164 DstVec = PrevMI->getOperand(0).getReg();
5165 }
5166
5167 // If DstTy's size in bits is less than 128, then emit a subregister copy
5168 // from DstVec to the last register we've defined.
5169 if (DstSize < 128) {
5170 // Force this to be FPR using the destination vector.
5171 const TargetRegisterClass *RC =
5172 getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
5173 if (!RC)
5174 return false;
5175 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5176 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
5177 return false;
5178 }
5179
5180 unsigned SubReg = 0;
5181 if (!getSubRegForClass(RC, TRI, SubReg))
5182 return false;
5183 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5184 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
5185 << "\n");
5186 return false;
5187 }
5188
5189 Register Reg = MRI.createVirtualRegister(RC);
5190 Register DstReg = I.getOperand(0).getReg();
5191
5192 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg);
5193 MachineOperand &RegOp = I.getOperand(1);
5194 RegOp.setReg(Reg);
5195 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5196 } else {
5197 // We don't need a subregister copy. Save a copy by re-using the
5198 // destination register on the final insert.
5199 assert(PrevMI && "PrevMI was null?");
5200 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
5201 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
5202 }
5203
5204 I.eraseFromParent();
5205 return true;
5206 }
5207
selectVectorLoadIntrinsic(unsigned Opc,unsigned NumVecs,MachineInstr & I)5208 bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
5209 unsigned NumVecs,
5210 MachineInstr &I) {
5211 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5212 assert(Opc && "Expected an opcode?");
5213 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
5214 auto &MRI = *MIB.getMRI();
5215 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5216 unsigned Size = Ty.getSizeInBits();
5217 assert((Size == 64 || Size == 128) &&
5218 "Destination must be 64 bits or 128 bits?");
5219 unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
5220 auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();
5221 assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?");
5222 auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
5223 Load.cloneMemRefs(I);
5224 constrainSelectedInstRegOperands(*Load, TII, TRI, RBI);
5225 Register SelectedLoadDst = Load->getOperand(0).getReg();
5226 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
5227 auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})
5228 .addReg(SelectedLoadDst, 0, SubReg + Idx);
5229 // Emit the subreg copies and immediately select them.
5230 // FIXME: We should refactor our copy code into an emitCopy helper and
5231 // clean up uses of this pattern elsewhere in the selector.
5232 selectCopy(*Vec, TII, MRI, TRI, RBI);
5233 }
5234 return true;
5235 }
5236
selectIntrinsicWithSideEffects(MachineInstr & I,MachineRegisterInfo & MRI)5237 bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
5238 MachineInstr &I, MachineRegisterInfo &MRI) {
5239 // Find the intrinsic ID.
5240 unsigned IntrinID = I.getIntrinsicID();
5241
5242 const LLT S8 = LLT::scalar(8);
5243 const LLT S16 = LLT::scalar(16);
5244 const LLT S32 = LLT::scalar(32);
5245 const LLT S64 = LLT::scalar(64);
5246 const LLT P0 = LLT::pointer(0, 64);
5247 // Select the instruction.
5248 switch (IntrinID) {
5249 default:
5250 return false;
5251 case Intrinsic::aarch64_ldxp:
5252 case Intrinsic::aarch64_ldaxp: {
5253 auto NewI = MIB.buildInstr(
5254 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
5255 {I.getOperand(0).getReg(), I.getOperand(1).getReg()},
5256 {I.getOperand(3)});
5257 NewI.cloneMemRefs(I);
5258 constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
5259 break;
5260 }
5261 case Intrinsic::trap:
5262 MIB.buildInstr(AArch64::BRK, {}, {}).addImm(1);
5263 break;
5264 case Intrinsic::debugtrap:
5265 MIB.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
5266 break;
5267 case Intrinsic::ubsantrap:
5268 MIB.buildInstr(AArch64::BRK, {}, {})
5269 .addImm(I.getOperand(1).getImm() | ('U' << 8));
5270 break;
5271 case Intrinsic::aarch64_neon_ld2: {
5272 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5273 unsigned Opc = 0;
5274 if (Ty == LLT::fixed_vector(8, S8))
5275 Opc = AArch64::LD2Twov8b;
5276 else if (Ty == LLT::fixed_vector(16, S8))
5277 Opc = AArch64::LD2Twov16b;
5278 else if (Ty == LLT::fixed_vector(4, S16))
5279 Opc = AArch64::LD2Twov4h;
5280 else if (Ty == LLT::fixed_vector(8, S16))
5281 Opc = AArch64::LD2Twov8h;
5282 else if (Ty == LLT::fixed_vector(2, S32))
5283 Opc = AArch64::LD2Twov2s;
5284 else if (Ty == LLT::fixed_vector(4, S32))
5285 Opc = AArch64::LD2Twov4s;
5286 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
5287 Opc = AArch64::LD2Twov2d;
5288 else if (Ty == S64 || Ty == P0)
5289 Opc = AArch64::LD1Twov1d;
5290 else
5291 llvm_unreachable("Unexpected type for ld2!");
5292 selectVectorLoadIntrinsic(Opc, 2, I);
5293 break;
5294 }
5295 case Intrinsic::aarch64_neon_ld4: {
5296 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5297 unsigned Opc = 0;
5298 if (Ty == LLT::fixed_vector(8, S8))
5299 Opc = AArch64::LD4Fourv8b;
5300 else if (Ty == LLT::fixed_vector(16, S8))
5301 Opc = AArch64::LD4Fourv16b;
5302 else if (Ty == LLT::fixed_vector(4, S16))
5303 Opc = AArch64::LD4Fourv4h;
5304 else if (Ty == LLT::fixed_vector(8, S16))
5305 Opc = AArch64::LD4Fourv8h;
5306 else if (Ty == LLT::fixed_vector(2, S32))
5307 Opc = AArch64::LD4Fourv2s;
5308 else if (Ty == LLT::fixed_vector(4, S32))
5309 Opc = AArch64::LD4Fourv4s;
5310 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
5311 Opc = AArch64::LD4Fourv2d;
5312 else if (Ty == S64 || Ty == P0)
5313 Opc = AArch64::LD1Fourv1d;
5314 else
5315 llvm_unreachable("Unexpected type for ld4!");
5316 selectVectorLoadIntrinsic(Opc, 4, I);
5317 break;
5318 }
5319 case Intrinsic::aarch64_neon_st2: {
5320 Register Src1 = I.getOperand(1).getReg();
5321 Register Src2 = I.getOperand(2).getReg();
5322 Register Ptr = I.getOperand(3).getReg();
5323 LLT Ty = MRI.getType(Src1);
5324 unsigned Opc;
5325 if (Ty == LLT::fixed_vector(8, S8))
5326 Opc = AArch64::ST2Twov8b;
5327 else if (Ty == LLT::fixed_vector(16, S8))
5328 Opc = AArch64::ST2Twov16b;
5329 else if (Ty == LLT::fixed_vector(4, S16))
5330 Opc = AArch64::ST2Twov4h;
5331 else if (Ty == LLT::fixed_vector(8, S16))
5332 Opc = AArch64::ST2Twov8h;
5333 else if (Ty == LLT::fixed_vector(2, S32))
5334 Opc = AArch64::ST2Twov2s;
5335 else if (Ty == LLT::fixed_vector(4, S32))
5336 Opc = AArch64::ST2Twov4s;
5337 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
5338 Opc = AArch64::ST2Twov2d;
5339 else if (Ty == S64 || Ty == P0)
5340 Opc = AArch64::ST1Twov1d;
5341 else
5342 llvm_unreachable("Unexpected type for st2!");
5343 SmallVector<Register, 2> Regs = {Src1, Src2};
5344 Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
5345 : createDTuple(Regs, MIB);
5346 auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});
5347 Store.cloneMemRefs(I);
5348 constrainSelectedInstRegOperands(*Store, TII, TRI, RBI);
5349 break;
5350 }
5351 }
5352
5353 I.eraseFromParent();
5354 return true;
5355 }
5356
selectIntrinsic(MachineInstr & I,MachineRegisterInfo & MRI)5357 bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
5358 MachineRegisterInfo &MRI) {
5359 unsigned IntrinID = I.getIntrinsicID();
5360
5361 switch (IntrinID) {
5362 default:
5363 break;
5364 case Intrinsic::aarch64_crypto_sha1h: {
5365 Register DstReg = I.getOperand(0).getReg();
5366 Register SrcReg = I.getOperand(2).getReg();
5367
5368 // FIXME: Should this be an assert?
5369 if (MRI.getType(DstReg).getSizeInBits() != 32 ||
5370 MRI.getType(SrcReg).getSizeInBits() != 32)
5371 return false;
5372
5373 // The operation has to happen on FPRs. Set up some new FPR registers for
5374 // the source and destination if they are on GPRs.
5375 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
5376 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
5377 MIB.buildCopy({SrcReg}, {I.getOperand(2)});
5378
5379 // Make sure the copy ends up getting constrained properly.
5380 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
5381 AArch64::GPR32RegClass, MRI);
5382 }
5383
5384 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
5385 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
5386
5387 // Actually insert the instruction.
5388 auto SHA1Inst = MIB.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
5389 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
5390
5391 // Did we create a new register for the destination?
5392 if (DstReg != I.getOperand(0).getReg()) {
5393 // Yep. Copy the result of the instruction back into the original
5394 // destination.
5395 MIB.buildCopy({I.getOperand(0)}, {DstReg});
5396 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
5397 AArch64::GPR32RegClass, MRI);
5398 }
5399
5400 I.eraseFromParent();
5401 return true;
5402 }
5403 case Intrinsic::frameaddress:
5404 case Intrinsic::returnaddress: {
5405 MachineFunction &MF = *I.getParent()->getParent();
5406 MachineFrameInfo &MFI = MF.getFrameInfo();
5407
5408 unsigned Depth = I.getOperand(2).getImm();
5409 Register DstReg = I.getOperand(0).getReg();
5410 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
5411
5412 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
5413 if (!MFReturnAddr) {
5414 // Insert the copy from LR/X30 into the entry block, before it can be
5415 // clobbered by anything.
5416 MFI.setReturnAddressIsTaken(true);
5417 MFReturnAddr = getFunctionLiveInPhysReg(MF, TII, AArch64::LR,
5418 AArch64::GPR64RegClass);
5419 }
5420
5421 if (STI.hasPAuth()) {
5422 MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
5423 } else {
5424 MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
5425 MIB.buildInstr(AArch64::XPACLRI);
5426 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
5427 }
5428
5429 I.eraseFromParent();
5430 return true;
5431 }
5432
5433 MFI.setFrameAddressIsTaken(true);
5434 Register FrameAddr(AArch64::FP);
5435 while (Depth--) {
5436 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
5437 auto Ldr =
5438 MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
5439 constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI);
5440 FrameAddr = NextFrame;
5441 }
5442
5443 if (IntrinID == Intrinsic::frameaddress)
5444 MIB.buildCopy({DstReg}, {FrameAddr});
5445 else {
5446 MFI.setReturnAddressIsTaken(true);
5447
5448 if (STI.hasPAuth()) {
5449 Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
5450 MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
5451 MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
5452 } else {
5453 MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr})
5454 .addImm(1);
5455 MIB.buildInstr(AArch64::XPACLRI);
5456 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
5457 }
5458 }
5459
5460 I.eraseFromParent();
5461 return true;
5462 }
5463 case Intrinsic::swift_async_context_addr:
5464 auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()},
5465 {Register(AArch64::FP)})
5466 .addImm(8)
5467 .addImm(0);
5468 constrainSelectedInstRegOperands(*Sub, TII, TRI, RBI);
5469
5470 MF->getFrameInfo().setFrameAddressIsTaken(true);
5471 MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5472 I.eraseFromParent();
5473 return true;
5474 }
5475 return false;
5476 }
5477
5478 InstructionSelector::ComplexRendererFns
selectShiftA_32(const MachineOperand & Root) const5479 AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
5480 auto MaybeImmed = getImmedFromMO(Root);
5481 if (MaybeImmed == None || *MaybeImmed > 31)
5482 return None;
5483 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
5484 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5485 }
5486
5487 InstructionSelector::ComplexRendererFns
selectShiftB_32(const MachineOperand & Root) const5488 AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
5489 auto MaybeImmed = getImmedFromMO(Root);
5490 if (MaybeImmed == None || *MaybeImmed > 31)
5491 return None;
5492 uint64_t Enc = 31 - *MaybeImmed;
5493 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5494 }
5495
5496 InstructionSelector::ComplexRendererFns
selectShiftA_64(const MachineOperand & Root) const5497 AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
5498 auto MaybeImmed = getImmedFromMO(Root);
5499 if (MaybeImmed == None || *MaybeImmed > 63)
5500 return None;
5501 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
5502 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5503 }
5504
5505 InstructionSelector::ComplexRendererFns
selectShiftB_64(const MachineOperand & Root) const5506 AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
5507 auto MaybeImmed = getImmedFromMO(Root);
5508 if (MaybeImmed == None || *MaybeImmed > 63)
5509 return None;
5510 uint64_t Enc = 63 - *MaybeImmed;
5511 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5512 }
5513
5514 /// Helper to select an immediate value that can be represented as a 12-bit
5515 /// value shifted left by either 0 or 12. If it is possible to do so, return
5516 /// the immediate and shift value. If not, return None.
5517 ///
5518 /// Used by selectArithImmed and selectNegArithImmed.
5519 InstructionSelector::ComplexRendererFns
select12BitValueWithLeftShift(uint64_t Immed) const5520 AArch64InstructionSelector::select12BitValueWithLeftShift(
5521 uint64_t Immed) const {
5522 unsigned ShiftAmt;
5523 if (Immed >> 12 == 0) {
5524 ShiftAmt = 0;
5525 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
5526 ShiftAmt = 12;
5527 Immed = Immed >> 12;
5528 } else
5529 return None;
5530
5531 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
5532 return {{
5533 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
5534 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
5535 }};
5536 }
5537
5538 /// SelectArithImmed - Select an immediate value that can be represented as
5539 /// a 12-bit value shifted left by either 0 or 12. If so, return true with
5540 /// Val set to the 12-bit value and Shift set to the shifter operand.
5541 InstructionSelector::ComplexRendererFns
selectArithImmed(MachineOperand & Root) const5542 AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
5543 // This function is called from the addsub_shifted_imm ComplexPattern,
5544 // which lists [imm] as the list of opcode it's interested in, however
5545 // we still need to check whether the operand is actually an immediate
5546 // here because the ComplexPattern opcode list is only used in
5547 // root-level opcode matching.
5548 auto MaybeImmed = getImmedFromMO(Root);
5549 if (MaybeImmed == None)
5550 return None;
5551 return select12BitValueWithLeftShift(*MaybeImmed);
5552 }
5553
5554 /// SelectNegArithImmed - As above, but negates the value before trying to
5555 /// select it.
5556 InstructionSelector::ComplexRendererFns
selectNegArithImmed(MachineOperand & Root) const5557 AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
5558 // We need a register here, because we need to know if we have a 64 or 32
5559 // bit immediate.
5560 if (!Root.isReg())
5561 return None;
5562 auto MaybeImmed = getImmedFromMO(Root);
5563 if (MaybeImmed == None)
5564 return None;
5565 uint64_t Immed = *MaybeImmed;
5566
5567 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
5568 // have the opposite effect on the C flag, so this pattern mustn't match under
5569 // those circumstances.
5570 if (Immed == 0)
5571 return None;
5572
5573 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
5574 // the root.
5575 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5576 if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
5577 Immed = ~((uint32_t)Immed) + 1;
5578 else
5579 Immed = ~Immed + 1ULL;
5580
5581 if (Immed & 0xFFFFFFFFFF000000ULL)
5582 return None;
5583
5584 Immed &= 0xFFFFFFULL;
5585 return select12BitValueWithLeftShift(Immed);
5586 }
5587
5588 /// Return true if it is worth folding MI into an extended register. That is,
5589 /// if it's safe to pull it into the addressing mode of a load or store as a
5590 /// shift.
isWorthFoldingIntoExtendedReg(MachineInstr & MI,const MachineRegisterInfo & MRI) const5591 bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
5592 MachineInstr &MI, const MachineRegisterInfo &MRI) const {
5593 // Always fold if there is one use, or if we're optimizing for size.
5594 Register DefReg = MI.getOperand(0).getReg();
5595 if (MRI.hasOneNonDBGUse(DefReg) ||
5596 MI.getParent()->getParent()->getFunction().hasOptSize())
5597 return true;
5598
5599 // It's better to avoid folding and recomputing shifts when we don't have a
5600 // fastpath.
5601 if (!STI.hasLSLFast())
5602 return false;
5603
5604 // We have a fastpath, so folding a shift in and potentially computing it
5605 // many times may be beneficial. Check if this is only used in memory ops.
5606 // If it is, then we should fold.
5607 return all_of(MRI.use_nodbg_instructions(DefReg),
5608 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
5609 }
5610
isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)5611 static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) {
5612 switch (Type) {
5613 case AArch64_AM::SXTB:
5614 case AArch64_AM::SXTH:
5615 case AArch64_AM::SXTW:
5616 return true;
5617 default:
5618 return false;
5619 }
5620 }
5621
5622 InstructionSelector::ComplexRendererFns
selectExtendedSHL(MachineOperand & Root,MachineOperand & Base,MachineOperand & Offset,unsigned SizeInBytes,bool WantsExt) const5623 AArch64InstructionSelector::selectExtendedSHL(
5624 MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
5625 unsigned SizeInBytes, bool WantsExt) const {
5626 assert(Base.isReg() && "Expected base to be a register operand");
5627 assert(Offset.isReg() && "Expected offset to be a register operand");
5628
5629 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5630 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
5631 if (!OffsetInst)
5632 return None;
5633
5634 unsigned OffsetOpc = OffsetInst->getOpcode();
5635 bool LookedThroughZExt = false;
5636 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
5637 // Try to look through a ZEXT.
5638 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
5639 return None;
5640
5641 OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
5642 OffsetOpc = OffsetInst->getOpcode();
5643 LookedThroughZExt = true;
5644
5645 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
5646 return None;
5647 }
5648 // Make sure that the memory op is a valid size.
5649 int64_t LegalShiftVal = Log2_32(SizeInBytes);
5650 if (LegalShiftVal == 0)
5651 return None;
5652 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
5653 return None;
5654
5655 // Now, try to find the specific G_CONSTANT. Start by assuming that the
5656 // register we will offset is the LHS, and the register containing the
5657 // constant is the RHS.
5658 Register OffsetReg = OffsetInst->getOperand(1).getReg();
5659 Register ConstantReg = OffsetInst->getOperand(2).getReg();
5660 auto ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
5661 if (!ValAndVReg) {
5662 // We didn't get a constant on the RHS. If the opcode is a shift, then
5663 // we're done.
5664 if (OffsetOpc == TargetOpcode::G_SHL)
5665 return None;
5666
5667 // If we have a G_MUL, we can use either register. Try looking at the RHS.
5668 std::swap(OffsetReg, ConstantReg);
5669 ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
5670 if (!ValAndVReg)
5671 return None;
5672 }
5673
5674 // The value must fit into 3 bits, and must be positive. Make sure that is
5675 // true.
5676 int64_t ImmVal = ValAndVReg->Value.getSExtValue();
5677
5678 // Since we're going to pull this into a shift, the constant value must be
5679 // a power of 2. If we got a multiply, then we need to check this.
5680 if (OffsetOpc == TargetOpcode::G_MUL) {
5681 if (!isPowerOf2_32(ImmVal))
5682 return None;
5683
5684 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
5685 ImmVal = Log2_32(ImmVal);
5686 }
5687
5688 if ((ImmVal & 0x7) != ImmVal)
5689 return None;
5690
5691 // We are only allowed to shift by LegalShiftVal. This shift value is built
5692 // into the instruction, so we can't just use whatever we want.
5693 if (ImmVal != LegalShiftVal)
5694 return None;
5695
5696 unsigned SignExtend = 0;
5697 if (WantsExt) {
5698 // Check if the offset is defined by an extend, unless we looked through a
5699 // G_ZEXT earlier.
5700 if (!LookedThroughZExt) {
5701 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
5702 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
5703 if (Ext == AArch64_AM::InvalidShiftExtend)
5704 return None;
5705
5706 SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
5707 // We only support SXTW for signed extension here.
5708 if (SignExtend && Ext != AArch64_AM::SXTW)
5709 return None;
5710 OffsetReg = ExtInst->getOperand(1).getReg();
5711 }
5712
5713 // Need a 32-bit wide register here.
5714 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
5715 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
5716 }
5717
5718 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
5719 // offset. Signify that we are shifting by setting the shift flag to 1.
5720 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
5721 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
5722 [=](MachineInstrBuilder &MIB) {
5723 // Need to add both immediates here to make sure that they are both
5724 // added to the instruction.
5725 MIB.addImm(SignExtend);
5726 MIB.addImm(1);
5727 }}};
5728 }
5729
5730 /// This is used for computing addresses like this:
5731 ///
5732 /// ldr x1, [x2, x3, lsl #3]
5733 ///
5734 /// Where x2 is the base register, and x3 is an offset register. The shift-left
5735 /// is a constant value specific to this load instruction. That is, we'll never
5736 /// see anything other than a 3 here (which corresponds to the size of the
5737 /// element being loaded.)
5738 InstructionSelector::ComplexRendererFns
selectAddrModeShiftedExtendXReg(MachineOperand & Root,unsigned SizeInBytes) const5739 AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
5740 MachineOperand &Root, unsigned SizeInBytes) const {
5741 if (!Root.isReg())
5742 return None;
5743 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5744
5745 // We want to find something like this:
5746 //
5747 // val = G_CONSTANT LegalShiftVal
5748 // shift = G_SHL off_reg val
5749 // ptr = G_PTR_ADD base_reg shift
5750 // x = G_LOAD ptr
5751 //
5752 // And fold it into this addressing mode:
5753 //
5754 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
5755
5756 // Check if we can find the G_PTR_ADD.
5757 MachineInstr *PtrAdd =
5758 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5759 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
5760 return None;
5761
5762 // Now, try to match an opcode which will match our specific offset.
5763 // We want a G_SHL or a G_MUL.
5764 MachineInstr *OffsetInst =
5765 getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);
5766 return selectExtendedSHL(Root, PtrAdd->getOperand(1),
5767 OffsetInst->getOperand(0), SizeInBytes,
5768 /*WantsExt=*/false);
5769 }
5770
5771 /// This is used for computing addresses like this:
5772 ///
5773 /// ldr x1, [x2, x3]
5774 ///
5775 /// Where x2 is the base register, and x3 is an offset register.
5776 ///
5777 /// When possible (or profitable) to fold a G_PTR_ADD into the address calculation,
5778 /// this will do so. Otherwise, it will return None.
5779 InstructionSelector::ComplexRendererFns
selectAddrModeRegisterOffset(MachineOperand & Root) const5780 AArch64InstructionSelector::selectAddrModeRegisterOffset(
5781 MachineOperand &Root) const {
5782 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5783
5784 // We need a GEP.
5785 MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
5786 if (!Gep || Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
5787 return None;
5788
5789 // If this is used more than once, let's not bother folding.
5790 // TODO: Check if they are memory ops. If they are, then we can still fold
5791 // without having to recompute anything.
5792 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
5793 return None;
5794
5795 // Base is the GEP's LHS, offset is its RHS.
5796 return {{[=](MachineInstrBuilder &MIB) {
5797 MIB.addUse(Gep->getOperand(1).getReg());
5798 },
5799 [=](MachineInstrBuilder &MIB) {
5800 MIB.addUse(Gep->getOperand(2).getReg());
5801 },
5802 [=](MachineInstrBuilder &MIB) {
5803 // Need to add both immediates here to make sure that they are both
5804 // added to the instruction.
5805 MIB.addImm(0);
5806 MIB.addImm(0);
5807 }}};
5808 }
5809
5810 /// This is intended to be equivalent to selectAddrModeXRO in
5811 /// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
5812 InstructionSelector::ComplexRendererFns
selectAddrModeXRO(MachineOperand & Root,unsigned SizeInBytes) const5813 AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
5814 unsigned SizeInBytes) const {
5815 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5816 if (!Root.isReg())
5817 return None;
5818 MachineInstr *PtrAdd =
5819 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5820 if (!PtrAdd)
5821 return None;
5822
5823 // Check for an immediates which cannot be encoded in the [base + imm]
5824 // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
5825 // end up with code like:
5826 //
5827 // mov x0, wide
5828 // add x1 base, x0
5829 // ldr x2, [x1, x0]
5830 //
5831 // In this situation, we can use the [base, xreg] addressing mode to save an
5832 // add/sub:
5833 //
5834 // mov x0, wide
5835 // ldr x2, [base, x0]
5836 auto ValAndVReg =
5837 getIConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);
5838 if (ValAndVReg) {
5839 unsigned Scale = Log2_32(SizeInBytes);
5840 int64_t ImmOff = ValAndVReg->Value.getSExtValue();
5841
5842 // Skip immediates that can be selected in the load/store addresing
5843 // mode.
5844 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
5845 ImmOff < (0x1000 << Scale))
5846 return None;
5847
5848 // Helper lambda to decide whether or not it is preferable to emit an add.
5849 auto isPreferredADD = [](int64_t ImmOff) {
5850 // Constants in [0x0, 0xfff] can be encoded in an add.
5851 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
5852 return true;
5853
5854 // Can it be encoded in an add lsl #12?
5855 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
5856 return false;
5857
5858 // It can be encoded in an add lsl #12, but we may not want to. If it is
5859 // possible to select this as a single movz, then prefer that. A single
5860 // movz is faster than an add with a shift.
5861 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
5862 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
5863 };
5864
5865 // If the immediate can be encoded in a single add/sub, then bail out.
5866 if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
5867 return None;
5868 }
5869
5870 // Try to fold shifts into the addressing mode.
5871 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
5872 if (AddrModeFns)
5873 return AddrModeFns;
5874
5875 // If that doesn't work, see if it's possible to fold in registers from
5876 // a GEP.
5877 return selectAddrModeRegisterOffset(Root);
5878 }
5879
5880 /// This is used for computing addresses like this:
5881 ///
5882 /// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
5883 ///
5884 /// Where we have a 64-bit base register, a 32-bit offset register, and an
5885 /// extend (which may or may not be signed).
5886 InstructionSelector::ComplexRendererFns
selectAddrModeWRO(MachineOperand & Root,unsigned SizeInBytes) const5887 AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
5888 unsigned SizeInBytes) const {
5889 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5890
5891 MachineInstr *PtrAdd =
5892 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5893 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
5894 return None;
5895
5896 MachineOperand &LHS = PtrAdd->getOperand(1);
5897 MachineOperand &RHS = PtrAdd->getOperand(2);
5898 MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
5899
5900 // The first case is the same as selectAddrModeXRO, except we need an extend.
5901 // In this case, we try to find a shift and extend, and fold them into the
5902 // addressing mode.
5903 //
5904 // E.g.
5905 //
5906 // off_reg = G_Z/S/ANYEXT ext_reg
5907 // val = G_CONSTANT LegalShiftVal
5908 // shift = G_SHL off_reg val
5909 // ptr = G_PTR_ADD base_reg shift
5910 // x = G_LOAD ptr
5911 //
5912 // In this case we can get a load like this:
5913 //
5914 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
5915 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
5916 SizeInBytes, /*WantsExt=*/true);
5917 if (ExtendedShl)
5918 return ExtendedShl;
5919
5920 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
5921 //
5922 // e.g.
5923 // ldr something, [base_reg, ext_reg, sxtw]
5924 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
5925 return None;
5926
5927 // Check if this is an extend. We'll get an extend type if it is.
5928 AArch64_AM::ShiftExtendType Ext =
5929 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
5930 if (Ext == AArch64_AM::InvalidShiftExtend)
5931 return None;
5932
5933 // Need a 32-bit wide register.
5934 MachineIRBuilder MIB(*PtrAdd);
5935 Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
5936 AArch64::GPR32RegClass, MIB);
5937 unsigned SignExtend = Ext == AArch64_AM::SXTW;
5938
5939 // Base is LHS, offset is ExtReg.
5940 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
5941 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
5942 [=](MachineInstrBuilder &MIB) {
5943 MIB.addImm(SignExtend);
5944 MIB.addImm(0);
5945 }}};
5946 }
5947
5948 /// Select a "register plus unscaled signed 9-bit immediate" address. This
5949 /// should only match when there is an offset that is not valid for a scaled
5950 /// immediate addressing mode. The "Size" argument is the size in bytes of the
5951 /// memory reference, which is needed here to know what is valid for a scaled
5952 /// immediate.
5953 InstructionSelector::ComplexRendererFns
selectAddrModeUnscaled(MachineOperand & Root,unsigned Size) const5954 AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
5955 unsigned Size) const {
5956 MachineRegisterInfo &MRI =
5957 Root.getParent()->getParent()->getParent()->getRegInfo();
5958
5959 if (!Root.isReg())
5960 return None;
5961
5962 if (!isBaseWithConstantOffset(Root, MRI))
5963 return None;
5964
5965 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
5966 if (!RootDef)
5967 return None;
5968
5969 MachineOperand &OffImm = RootDef->getOperand(2);
5970 if (!OffImm.isReg())
5971 return None;
5972 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
5973 if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
5974 return None;
5975 int64_t RHSC;
5976 MachineOperand &RHSOp1 = RHS->getOperand(1);
5977 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
5978 return None;
5979 RHSC = RHSOp1.getCImm()->getSExtValue();
5980
5981 // If the offset is valid as a scaled immediate, don't match here.
5982 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
5983 return None;
5984 if (RHSC >= -256 && RHSC < 256) {
5985 MachineOperand &Base = RootDef->getOperand(1);
5986 return {{
5987 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
5988 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
5989 }};
5990 }
5991 return None;
5992 }
5993
5994 InstructionSelector::ComplexRendererFns
tryFoldAddLowIntoImm(MachineInstr & RootDef,unsigned Size,MachineRegisterInfo & MRI) const5995 AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
5996 unsigned Size,
5997 MachineRegisterInfo &MRI) const {
5998 if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
5999 return None;
6000 MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
6001 if (Adrp.getOpcode() != AArch64::ADRP)
6002 return None;
6003
6004 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
6005 auto Offset = Adrp.getOperand(1).getOffset();
6006 if (Offset % Size != 0)
6007 return None;
6008
6009 auto GV = Adrp.getOperand(1).getGlobal();
6010 if (GV->isThreadLocal())
6011 return None;
6012
6013 auto &MF = *RootDef.getParent()->getParent();
6014 if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
6015 return None;
6016
6017 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
6018 MachineIRBuilder MIRBuilder(RootDef);
6019 Register AdrpReg = Adrp.getOperand(0).getReg();
6020 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
6021 [=](MachineInstrBuilder &MIB) {
6022 MIB.addGlobalAddress(GV, Offset,
6023 OpFlags | AArch64II::MO_PAGEOFF |
6024 AArch64II::MO_NC);
6025 }}};
6026 }
6027
6028 /// Select a "register plus scaled unsigned 12-bit immediate" address. The
6029 /// "Size" argument is the size in bytes of the memory reference, which
6030 /// determines the scale.
6031 InstructionSelector::ComplexRendererFns
selectAddrModeIndexed(MachineOperand & Root,unsigned Size) const6032 AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
6033 unsigned Size) const {
6034 MachineFunction &MF = *Root.getParent()->getParent()->getParent();
6035 MachineRegisterInfo &MRI = MF.getRegInfo();
6036
6037 if (!Root.isReg())
6038 return None;
6039
6040 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
6041 if (!RootDef)
6042 return None;
6043
6044 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
6045 return {{
6046 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
6047 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
6048 }};
6049 }
6050
6051 CodeModel::Model CM = MF.getTarget().getCodeModel();
6052 // Check if we can fold in the ADD of small code model ADRP + ADD address.
6053 if (CM == CodeModel::Small) {
6054 auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
6055 if (OpFns)
6056 return OpFns;
6057 }
6058
6059 if (isBaseWithConstantOffset(Root, MRI)) {
6060 MachineOperand &LHS = RootDef->getOperand(1);
6061 MachineOperand &RHS = RootDef->getOperand(2);
6062 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
6063 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
6064 if (LHSDef && RHSDef) {
6065 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
6066 unsigned Scale = Log2_32(Size);
6067 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
6068 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
6069 return {{
6070 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
6071 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
6072 }};
6073
6074 return {{
6075 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
6076 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
6077 }};
6078 }
6079 }
6080 }
6081
6082 // Before falling back to our general case, check if the unscaled
6083 // instructions can handle this. If so, that's preferable.
6084 if (selectAddrModeUnscaled(Root, Size).hasValue())
6085 return None;
6086
6087 return {{
6088 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
6089 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
6090 }};
6091 }
6092
6093 /// Given a shift instruction, return the correct shift type for that
6094 /// instruction.
getShiftTypeForInst(MachineInstr & MI)6095 static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
6096 switch (MI.getOpcode()) {
6097 default:
6098 return AArch64_AM::InvalidShiftExtend;
6099 case TargetOpcode::G_SHL:
6100 return AArch64_AM::LSL;
6101 case TargetOpcode::G_LSHR:
6102 return AArch64_AM::LSR;
6103 case TargetOpcode::G_ASHR:
6104 return AArch64_AM::ASR;
6105 case TargetOpcode::G_ROTR:
6106 return AArch64_AM::ROR;
6107 }
6108 }
6109
6110 /// Select a "shifted register" operand. If the value is not shifted, set the
6111 /// shift operand to a default value of "lsl 0".
6112 InstructionSelector::ComplexRendererFns
selectShiftedRegister(MachineOperand & Root,bool AllowROR) const6113 AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
6114 bool AllowROR) const {
6115 if (!Root.isReg())
6116 return None;
6117 MachineRegisterInfo &MRI =
6118 Root.getParent()->getParent()->getParent()->getRegInfo();
6119
6120 // Check if the operand is defined by an instruction which corresponds to
6121 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
6122 MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
6123 if (!ShiftInst)
6124 return None;
6125 AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst);
6126 if (ShType == AArch64_AM::InvalidShiftExtend)
6127 return None;
6128 if (ShType == AArch64_AM::ROR && !AllowROR)
6129 return None;
6130 if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
6131 return None;
6132
6133 // Need an immediate on the RHS.
6134 MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
6135 auto Immed = getImmedFromMO(ShiftRHS);
6136 if (!Immed)
6137 return None;
6138
6139 // We have something that we can fold. Fold in the shift's LHS and RHS into
6140 // the instruction.
6141 MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
6142 Register ShiftReg = ShiftLHS.getReg();
6143
6144 unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
6145 unsigned Val = *Immed & (NumBits - 1);
6146 unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
6147
6148 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
6149 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
6150 }
6151
getExtendTypeForInst(MachineInstr & MI,MachineRegisterInfo & MRI,bool IsLoadStore) const6152 AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
6153 MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
6154 unsigned Opc = MI.getOpcode();
6155
6156 // Handle explicit extend instructions first.
6157 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
6158 unsigned Size;
6159 if (Opc == TargetOpcode::G_SEXT)
6160 Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
6161 else
6162 Size = MI.getOperand(2).getImm();
6163 assert(Size != 64 && "Extend from 64 bits?");
6164 switch (Size) {
6165 case 8:
6166 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB;
6167 case 16:
6168 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH;
6169 case 32:
6170 return AArch64_AM::SXTW;
6171 default:
6172 return AArch64_AM::InvalidShiftExtend;
6173 }
6174 }
6175
6176 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
6177 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
6178 assert(Size != 64 && "Extend from 64 bits?");
6179 switch (Size) {
6180 case 8:
6181 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB;
6182 case 16:
6183 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH;
6184 case 32:
6185 return AArch64_AM::UXTW;
6186 default:
6187 return AArch64_AM::InvalidShiftExtend;
6188 }
6189 }
6190
6191 // Don't have an explicit extend. Try to handle a G_AND with a constant mask
6192 // on the RHS.
6193 if (Opc != TargetOpcode::G_AND)
6194 return AArch64_AM::InvalidShiftExtend;
6195
6196 Optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
6197 if (!MaybeAndMask)
6198 return AArch64_AM::InvalidShiftExtend;
6199 uint64_t AndMask = *MaybeAndMask;
6200 switch (AndMask) {
6201 default:
6202 return AArch64_AM::InvalidShiftExtend;
6203 case 0xFF:
6204 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
6205 case 0xFFFF:
6206 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
6207 case 0xFFFFFFFF:
6208 return AArch64_AM::UXTW;
6209 }
6210 }
6211
moveScalarRegClass(Register Reg,const TargetRegisterClass & RC,MachineIRBuilder & MIB) const6212 Register AArch64InstructionSelector::moveScalarRegClass(
6213 Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
6214 MachineRegisterInfo &MRI = *MIB.getMRI();
6215 auto Ty = MRI.getType(Reg);
6216 assert(!Ty.isVector() && "Expected scalars only!");
6217 if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
6218 return Reg;
6219
6220 // Create a copy and immediately select it.
6221 // FIXME: We should have an emitCopy function?
6222 auto Copy = MIB.buildCopy({&RC}, {Reg});
6223 selectCopy(*Copy, TII, MRI, TRI, RBI);
6224 return Copy.getReg(0);
6225 }
6226
6227 /// Select an "extended register" operand. This operand folds in an extend
6228 /// followed by an optional left shift.
6229 InstructionSelector::ComplexRendererFns
selectArithExtendedRegister(MachineOperand & Root) const6230 AArch64InstructionSelector::selectArithExtendedRegister(
6231 MachineOperand &Root) const {
6232 if (!Root.isReg())
6233 return None;
6234 MachineRegisterInfo &MRI =
6235 Root.getParent()->getParent()->getParent()->getRegInfo();
6236
6237 uint64_t ShiftVal = 0;
6238 Register ExtReg;
6239 AArch64_AM::ShiftExtendType Ext;
6240 MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
6241 if (!RootDef)
6242 return None;
6243
6244 if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI))
6245 return None;
6246
6247 // Check if we can fold a shift and an extend.
6248 if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
6249 // Look for a constant on the RHS of the shift.
6250 MachineOperand &RHS = RootDef->getOperand(2);
6251 Optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
6252 if (!MaybeShiftVal)
6253 return None;
6254 ShiftVal = *MaybeShiftVal;
6255 if (ShiftVal > 4)
6256 return None;
6257 // Look for a valid extend instruction on the LHS of the shift.
6258 MachineOperand &LHS = RootDef->getOperand(1);
6259 MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
6260 if (!ExtDef)
6261 return None;
6262 Ext = getExtendTypeForInst(*ExtDef, MRI);
6263 if (Ext == AArch64_AM::InvalidShiftExtend)
6264 return None;
6265 ExtReg = ExtDef->getOperand(1).getReg();
6266 } else {
6267 // Didn't get a shift. Try just folding an extend.
6268 Ext = getExtendTypeForInst(*RootDef, MRI);
6269 if (Ext == AArch64_AM::InvalidShiftExtend)
6270 return None;
6271 ExtReg = RootDef->getOperand(1).getReg();
6272
6273 // If we have a 32 bit instruction which zeroes out the high half of a
6274 // register, we get an implicit zero extend for free. Check if we have one.
6275 // FIXME: We actually emit the extend right now even though we don't have
6276 // to.
6277 if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
6278 MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
6279 if (ExtInst && isDef32(*ExtInst))
6280 return None;
6281 }
6282 }
6283
6284 // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
6285 // copy.
6286 MachineIRBuilder MIB(*RootDef);
6287 ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
6288
6289 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
6290 [=](MachineInstrBuilder &MIB) {
6291 MIB.addImm(getArithExtendImm(Ext, ShiftVal));
6292 }}};
6293 }
6294
renderTruncImm(MachineInstrBuilder & MIB,const MachineInstr & MI,int OpIdx) const6295 void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
6296 const MachineInstr &MI,
6297 int OpIdx) const {
6298 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
6299 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
6300 "Expected G_CONSTANT");
6301 Optional<int64_t> CstVal =
6302 getIConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
6303 assert(CstVal && "Expected constant value");
6304 MIB.addImm(CstVal.getValue());
6305 }
6306
renderLogicalImm32(MachineInstrBuilder & MIB,const MachineInstr & I,int OpIdx) const6307 void AArch64InstructionSelector::renderLogicalImm32(
6308 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
6309 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
6310 "Expected G_CONSTANT");
6311 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
6312 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);
6313 MIB.addImm(Enc);
6314 }
6315
renderLogicalImm64(MachineInstrBuilder & MIB,const MachineInstr & I,int OpIdx) const6316 void AArch64InstructionSelector::renderLogicalImm64(
6317 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
6318 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
6319 "Expected G_CONSTANT");
6320 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
6321 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);
6322 MIB.addImm(Enc);
6323 }
6324
renderFPImm16(MachineInstrBuilder & MIB,const MachineInstr & MI,int OpIdx) const6325 void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB,
6326 const MachineInstr &MI,
6327 int OpIdx) const {
6328 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
6329 "Expected G_FCONSTANT");
6330 MIB.addImm(
6331 AArch64_AM::getFP16Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
6332 }
6333
renderFPImm32(MachineInstrBuilder & MIB,const MachineInstr & MI,int OpIdx) const6334 void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB,
6335 const MachineInstr &MI,
6336 int OpIdx) const {
6337 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
6338 "Expected G_FCONSTANT");
6339 MIB.addImm(
6340 AArch64_AM::getFP32Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
6341 }
6342
renderFPImm64(MachineInstrBuilder & MIB,const MachineInstr & MI,int OpIdx) const6343 void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,
6344 const MachineInstr &MI,
6345 int OpIdx) const {
6346 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
6347 "Expected G_FCONSTANT");
6348 MIB.addImm(
6349 AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
6350 }
6351
isLoadStoreOfNumBytes(const MachineInstr & MI,unsigned NumBytes) const6352 bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
6353 const MachineInstr &MI, unsigned NumBytes) const {
6354 if (!MI.mayLoadOrStore())
6355 return false;
6356 assert(MI.hasOneMemOperand() &&
6357 "Expected load/store to have only one mem op!");
6358 return (*MI.memoperands_begin())->getSize() == NumBytes;
6359 }
6360
isDef32(const MachineInstr & MI) const6361 bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
6362 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
6363 if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
6364 return false;
6365
6366 // Only return true if we know the operation will zero-out the high half of
6367 // the 64-bit register. Truncates can be subregister copies, which don't
6368 // zero out the high bits. Copies and other copy-like instructions can be
6369 // fed by truncates, or could be lowered as subregister copies.
6370 switch (MI.getOpcode()) {
6371 default:
6372 return true;
6373 case TargetOpcode::COPY:
6374 case TargetOpcode::G_BITCAST:
6375 case TargetOpcode::G_TRUNC:
6376 case TargetOpcode::G_PHI:
6377 return false;
6378 }
6379 }
6380
6381
6382 // Perform fixups on the given PHI instruction's operands to force them all
6383 // to be the same as the destination regbank.
fixupPHIOpBanks(MachineInstr & MI,MachineRegisterInfo & MRI,const AArch64RegisterBankInfo & RBI)6384 static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI,
6385 const AArch64RegisterBankInfo &RBI) {
6386 assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
6387 Register DstReg = MI.getOperand(0).getReg();
6388 const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
6389 assert(DstRB && "Expected PHI dst to have regbank assigned");
6390 MachineIRBuilder MIB(MI);
6391
6392 // Go through each operand and ensure it has the same regbank.
6393 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
6394 MachineOperand &MO = MI.getOperand(OpIdx);
6395 if (!MO.isReg())
6396 continue;
6397 Register OpReg = MO.getReg();
6398 const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
6399 if (RB != DstRB) {
6400 // Insert a cross-bank copy.
6401 auto *OpDef = MRI.getVRegDef(OpReg);
6402 const LLT &Ty = MRI.getType(OpReg);
6403 MachineBasicBlock &OpDefBB = *OpDef->getParent();
6404
6405 // Any instruction we insert must appear after all PHIs in the block
6406 // for the block to be valid MIR.
6407 MachineBasicBlock::iterator InsertPt = std::next(OpDef->getIterator());
6408 if (InsertPt != OpDefBB.end() && InsertPt->isPHI())
6409 InsertPt = OpDefBB.getFirstNonPHI();
6410 MIB.setInsertPt(*OpDef->getParent(), InsertPt);
6411 auto Copy = MIB.buildCopy(Ty, OpReg);
6412 MRI.setRegBank(Copy.getReg(0), *DstRB);
6413 MO.setReg(Copy.getReg(0));
6414 }
6415 }
6416 }
6417
processPHIs(MachineFunction & MF)6418 void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
6419 // We're looking for PHIs, build a list so we don't invalidate iterators.
6420 MachineRegisterInfo &MRI = MF.getRegInfo();
6421 SmallVector<MachineInstr *, 32> Phis;
6422 for (auto &BB : MF) {
6423 for (auto &MI : BB) {
6424 if (MI.getOpcode() == TargetOpcode::G_PHI)
6425 Phis.emplace_back(&MI);
6426 }
6427 }
6428
6429 for (auto *MI : Phis) {
6430 // We need to do some work here if the operand types are < 16 bit and they
6431 // are split across fpr/gpr banks. Since all types <32b on gpr
6432 // end up being assigned gpr32 regclasses, we can end up with PHIs here
6433 // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
6434 // be selecting heterogenous regbanks for operands if possible, but we
6435 // still need to be able to deal with it here.
6436 //
6437 // To fix this, if we have a gpr-bank operand < 32b in size and at least
6438 // one other operand is on the fpr bank, then we add cross-bank copies
6439 // to homogenize the operand banks. For simplicity the bank that we choose
6440 // to settle on is whatever bank the def operand has. For example:
6441 //
6442 // %endbb:
6443 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
6444 // =>
6445 // %bb2:
6446 // ...
6447 // %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
6448 // ...
6449 // %endbb:
6450 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
6451 bool HasGPROp = false, HasFPROp = false;
6452 for (unsigned OpIdx = 1; OpIdx < MI->getNumOperands(); ++OpIdx) {
6453 const auto &MO = MI->getOperand(OpIdx);
6454 if (!MO.isReg())
6455 continue;
6456 const LLT &Ty = MRI.getType(MO.getReg());
6457 if (!Ty.isValid() || !Ty.isScalar())
6458 break;
6459 if (Ty.getSizeInBits() >= 32)
6460 break;
6461 const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
6462 // If for some reason we don't have a regbank yet. Don't try anything.
6463 if (!RB)
6464 break;
6465
6466 if (RB->getID() == AArch64::GPRRegBankID)
6467 HasGPROp = true;
6468 else
6469 HasFPROp = true;
6470 }
6471 // We have heterogenous regbanks, need to fixup.
6472 if (HasGPROp && HasFPROp)
6473 fixupPHIOpBanks(*MI, MRI, RBI);
6474 }
6475 }
6476
6477 namespace llvm {
6478 InstructionSelector *
createAArch64InstructionSelector(const AArch64TargetMachine & TM,AArch64Subtarget & Subtarget,AArch64RegisterBankInfo & RBI)6479 createAArch64InstructionSelector(const AArch64TargetMachine &TM,
6480 AArch64Subtarget &Subtarget,
6481 AArch64RegisterBankInfo &RBI) {
6482 return new AArch64InstructionSelector(TM, Subtarget, RBI);
6483 }
6484 }
6485