1 //===- X86InstructionSelector.cpp -----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the InstructionSelector class for
10 /// X86.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13
14 #include "MCTargetDesc/X86BaseInfo.h"
15 #include "X86InstrBuilder.h"
16 #include "X86InstrInfo.h"
17 #include "X86RegisterBankInfo.h"
18 #include "X86RegisterInfo.h"
19 #include "X86Subtarget.h"
20 #include "X86TargetMachine.h"
21 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
22 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
23 #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
24 #include "llvm/CodeGen/GlobalISel/Utils.h"
25 #include "llvm/CodeGen/MachineBasicBlock.h"
26 #include "llvm/CodeGen/MachineConstantPool.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineInstr.h"
29 #include "llvm/CodeGen/MachineInstrBuilder.h"
30 #include "llvm/CodeGen/MachineMemOperand.h"
31 #include "llvm/CodeGen/MachineOperand.h"
32 #include "llvm/CodeGen/MachineRegisterInfo.h"
33 #include "llvm/CodeGen/TargetOpcodes.h"
34 #include "llvm/CodeGen/TargetRegisterInfo.h"
35 #include "llvm/IR/DataLayout.h"
36 #include "llvm/IR/InstrTypes.h"
37 #include "llvm/IR/IntrinsicsX86.h"
38 #include "llvm/Support/AtomicOrdering.h"
39 #include "llvm/Support/CodeGen.h"
40 #include "llvm/Support/Debug.h"
41 #include "llvm/Support/ErrorHandling.h"
42 #include "llvm/Support/LowLevelTypeImpl.h"
43 #include "llvm/Support/MathExtras.h"
44 #include "llvm/Support/raw_ostream.h"
45 #include <cassert>
46 #include <cstdint>
47 #include <tuple>
48
49 #define DEBUG_TYPE "X86-isel"
50
51 using namespace llvm;
52
53 namespace {
54
55 #define GET_GLOBALISEL_PREDICATE_BITSET
56 #include "X86GenGlobalISel.inc"
57 #undef GET_GLOBALISEL_PREDICATE_BITSET
58
59 class X86InstructionSelector : public InstructionSelector {
60 public:
61 X86InstructionSelector(const X86TargetMachine &TM, const X86Subtarget &STI,
62 const X86RegisterBankInfo &RBI);
63
64 bool select(MachineInstr &I) override;
getName()65 static const char *getName() { return DEBUG_TYPE; }
66
67 private:
68 /// tblgen-erated 'select' implementation, used as the initial selector for
69 /// the patterns that don't require complex C++.
70 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
71
72 // TODO: remove after supported by Tablegen-erated instruction selection.
73 unsigned getLoadStoreOp(const LLT &Ty, const RegisterBank &RB, unsigned Opc,
74 uint64_t Alignment) const;
75
76 bool selectLoadStoreOp(MachineInstr &I, MachineRegisterInfo &MRI,
77 MachineFunction &MF) const;
78 bool selectFrameIndexOrGep(MachineInstr &I, MachineRegisterInfo &MRI,
79 MachineFunction &MF) const;
80 bool selectGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI,
81 MachineFunction &MF) const;
82 bool selectConstant(MachineInstr &I, MachineRegisterInfo &MRI,
83 MachineFunction &MF) const;
84 bool selectTruncOrPtrToInt(MachineInstr &I, MachineRegisterInfo &MRI,
85 MachineFunction &MF) const;
86 bool selectZext(MachineInstr &I, MachineRegisterInfo &MRI,
87 MachineFunction &MF) const;
88 bool selectAnyext(MachineInstr &I, MachineRegisterInfo &MRI,
89 MachineFunction &MF) const;
90 bool selectCmp(MachineInstr &I, MachineRegisterInfo &MRI,
91 MachineFunction &MF) const;
92 bool selectFCmp(MachineInstr &I, MachineRegisterInfo &MRI,
93 MachineFunction &MF) const;
94 bool selectUadde(MachineInstr &I, MachineRegisterInfo &MRI,
95 MachineFunction &MF) const;
96 bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const;
97 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI,
98 MachineFunction &MF);
99 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI,
100 MachineFunction &MF);
101 bool selectInsert(MachineInstr &I, MachineRegisterInfo &MRI,
102 MachineFunction &MF) const;
103 bool selectExtract(MachineInstr &I, MachineRegisterInfo &MRI,
104 MachineFunction &MF) const;
105 bool selectCondBranch(MachineInstr &I, MachineRegisterInfo &MRI,
106 MachineFunction &MF) const;
107 bool selectTurnIntoCOPY(MachineInstr &I, MachineRegisterInfo &MRI,
108 const unsigned DstReg,
109 const TargetRegisterClass *DstRC,
110 const unsigned SrcReg,
111 const TargetRegisterClass *SrcRC) const;
112 bool materializeFP(MachineInstr &I, MachineRegisterInfo &MRI,
113 MachineFunction &MF) const;
114 bool selectImplicitDefOrPHI(MachineInstr &I, MachineRegisterInfo &MRI) const;
115 bool selectDivRem(MachineInstr &I, MachineRegisterInfo &MRI,
116 MachineFunction &MF) const;
117 bool selectIntrinsicWSideEffects(MachineInstr &I, MachineRegisterInfo &MRI,
118 MachineFunction &MF) const;
119
120 // emit insert subreg instruction and insert it before MachineInstr &I
121 bool emitInsertSubreg(unsigned DstReg, unsigned SrcReg, MachineInstr &I,
122 MachineRegisterInfo &MRI, MachineFunction &MF) const;
123 // emit extract subreg instruction and insert it before MachineInstr &I
124 bool emitExtractSubreg(unsigned DstReg, unsigned SrcReg, MachineInstr &I,
125 MachineRegisterInfo &MRI, MachineFunction &MF) const;
126
127 const TargetRegisterClass *getRegClass(LLT Ty, const RegisterBank &RB) const;
128 const TargetRegisterClass *getRegClass(LLT Ty, unsigned Reg,
129 MachineRegisterInfo &MRI) const;
130
131 const X86TargetMachine &TM;
132 const X86Subtarget &STI;
133 const X86InstrInfo &TII;
134 const X86RegisterInfo &TRI;
135 const X86RegisterBankInfo &RBI;
136
137 #define GET_GLOBALISEL_PREDICATES_DECL
138 #include "X86GenGlobalISel.inc"
139 #undef GET_GLOBALISEL_PREDICATES_DECL
140
141 #define GET_GLOBALISEL_TEMPORARIES_DECL
142 #include "X86GenGlobalISel.inc"
143 #undef GET_GLOBALISEL_TEMPORARIES_DECL
144 };
145
146 } // end anonymous namespace
147
148 #define GET_GLOBALISEL_IMPL
149 #include "X86GenGlobalISel.inc"
150 #undef GET_GLOBALISEL_IMPL
151
X86InstructionSelector(const X86TargetMachine & TM,const X86Subtarget & STI,const X86RegisterBankInfo & RBI)152 X86InstructionSelector::X86InstructionSelector(const X86TargetMachine &TM,
153 const X86Subtarget &STI,
154 const X86RegisterBankInfo &RBI)
155 : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
156 TRI(*STI.getRegisterInfo()), RBI(RBI),
157 #define GET_GLOBALISEL_PREDICATES_INIT
158 #include "X86GenGlobalISel.inc"
159 #undef GET_GLOBALISEL_PREDICATES_INIT
160 #define GET_GLOBALISEL_TEMPORARIES_INIT
161 #include "X86GenGlobalISel.inc"
162 #undef GET_GLOBALISEL_TEMPORARIES_INIT
163 {
164 }
165
166 // FIXME: This should be target-independent, inferred from the types declared
167 // for each class in the bank.
168 const TargetRegisterClass *
getRegClass(LLT Ty,const RegisterBank & RB) const169 X86InstructionSelector::getRegClass(LLT Ty, const RegisterBank &RB) const {
170 if (RB.getID() == X86::GPRRegBankID) {
171 if (Ty.getSizeInBits() <= 8)
172 return &X86::GR8RegClass;
173 if (Ty.getSizeInBits() == 16)
174 return &X86::GR16RegClass;
175 if (Ty.getSizeInBits() == 32)
176 return &X86::GR32RegClass;
177 if (Ty.getSizeInBits() == 64)
178 return &X86::GR64RegClass;
179 }
180 if (RB.getID() == X86::VECRRegBankID) {
181 if (Ty.getSizeInBits() == 32)
182 return STI.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
183 if (Ty.getSizeInBits() == 64)
184 return STI.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
185 if (Ty.getSizeInBits() == 128)
186 return STI.hasAVX512() ? &X86::VR128XRegClass : &X86::VR128RegClass;
187 if (Ty.getSizeInBits() == 256)
188 return STI.hasAVX512() ? &X86::VR256XRegClass : &X86::VR256RegClass;
189 if (Ty.getSizeInBits() == 512)
190 return &X86::VR512RegClass;
191 }
192
193 llvm_unreachable("Unknown RegBank!");
194 }
195
196 const TargetRegisterClass *
getRegClass(LLT Ty,unsigned Reg,MachineRegisterInfo & MRI) const197 X86InstructionSelector::getRegClass(LLT Ty, unsigned Reg,
198 MachineRegisterInfo &MRI) const {
199 const RegisterBank &RegBank = *RBI.getRegBank(Reg, MRI, TRI);
200 return getRegClass(Ty, RegBank);
201 }
202
getSubRegIndex(const TargetRegisterClass * RC)203 static unsigned getSubRegIndex(const TargetRegisterClass *RC) {
204 unsigned SubIdx = X86::NoSubRegister;
205 if (RC == &X86::GR32RegClass) {
206 SubIdx = X86::sub_32bit;
207 } else if (RC == &X86::GR16RegClass) {
208 SubIdx = X86::sub_16bit;
209 } else if (RC == &X86::GR8RegClass) {
210 SubIdx = X86::sub_8bit;
211 }
212
213 return SubIdx;
214 }
215
getRegClassFromGRPhysReg(unsigned Reg)216 static const TargetRegisterClass *getRegClassFromGRPhysReg(unsigned Reg) {
217 assert(Register::isPhysicalRegister(Reg));
218 if (X86::GR64RegClass.contains(Reg))
219 return &X86::GR64RegClass;
220 if (X86::GR32RegClass.contains(Reg))
221 return &X86::GR32RegClass;
222 if (X86::GR16RegClass.contains(Reg))
223 return &X86::GR16RegClass;
224 if (X86::GR8RegClass.contains(Reg))
225 return &X86::GR8RegClass;
226
227 llvm_unreachable("Unknown RegClass for PhysReg!");
228 }
229
230 // Set X86 Opcode and constrain DestReg.
selectCopy(MachineInstr & I,MachineRegisterInfo & MRI) const231 bool X86InstructionSelector::selectCopy(MachineInstr &I,
232 MachineRegisterInfo &MRI) const {
233 Register DstReg = I.getOperand(0).getReg();
234 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
235 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
236
237 Register SrcReg = I.getOperand(1).getReg();
238 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
239 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
240
241 if (Register::isPhysicalRegister(DstReg)) {
242 assert(I.isCopy() && "Generic operators do not allow physical registers");
243
244 if (DstSize > SrcSize && SrcRegBank.getID() == X86::GPRRegBankID &&
245 DstRegBank.getID() == X86::GPRRegBankID) {
246
247 const TargetRegisterClass *SrcRC =
248 getRegClass(MRI.getType(SrcReg), SrcRegBank);
249 const TargetRegisterClass *DstRC = getRegClassFromGRPhysReg(DstReg);
250
251 if (SrcRC != DstRC) {
252 // This case can be generated by ABI lowering, performe anyext
253 Register ExtSrc = MRI.createVirtualRegister(DstRC);
254 BuildMI(*I.getParent(), I, I.getDebugLoc(),
255 TII.get(TargetOpcode::SUBREG_TO_REG))
256 .addDef(ExtSrc)
257 .addImm(0)
258 .addReg(SrcReg)
259 .addImm(getSubRegIndex(SrcRC));
260
261 I.getOperand(1).setReg(ExtSrc);
262 }
263 }
264
265 return true;
266 }
267
268 assert((!Register::isPhysicalRegister(SrcReg) || I.isCopy()) &&
269 "No phys reg on generic operators");
270 assert((DstSize == SrcSize ||
271 // Copies are a mean to setup initial types, the number of
272 // bits may not exactly match.
273 (Register::isPhysicalRegister(SrcReg) &&
274 DstSize <= RBI.getSizeInBits(SrcReg, MRI, TRI))) &&
275 "Copy with different width?!");
276
277 const TargetRegisterClass *DstRC =
278 getRegClass(MRI.getType(DstReg), DstRegBank);
279
280 if (SrcRegBank.getID() == X86::GPRRegBankID &&
281 DstRegBank.getID() == X86::GPRRegBankID && SrcSize > DstSize &&
282 Register::isPhysicalRegister(SrcReg)) {
283 // Change the physical register to performe truncate.
284
285 const TargetRegisterClass *SrcRC = getRegClassFromGRPhysReg(SrcReg);
286
287 if (DstRC != SrcRC) {
288 I.getOperand(1).setSubReg(getSubRegIndex(DstRC));
289 I.getOperand(1).substPhysReg(SrcReg, TRI);
290 }
291 }
292
293 // No need to constrain SrcReg. It will get constrained when
294 // we hit another of its use or its defs.
295 // Copies do not have constraints.
296 const TargetRegisterClass *OldRC = MRI.getRegClassOrNull(DstReg);
297 if (!OldRC || !DstRC->hasSubClassEq(OldRC)) {
298 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
299 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
300 << " operand\n");
301 return false;
302 }
303 }
304 I.setDesc(TII.get(X86::COPY));
305 return true;
306 }
307
select(MachineInstr & I)308 bool X86InstructionSelector::select(MachineInstr &I) {
309 assert(I.getParent() && "Instruction should be in a basic block!");
310 assert(I.getParent()->getParent() && "Instruction should be in a function!");
311
312 MachineBasicBlock &MBB = *I.getParent();
313 MachineFunction &MF = *MBB.getParent();
314 MachineRegisterInfo &MRI = MF.getRegInfo();
315
316 unsigned Opcode = I.getOpcode();
317 if (!isPreISelGenericOpcode(Opcode)) {
318 // Certain non-generic instructions also need some special handling.
319
320 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
321 return false;
322
323 if (I.isCopy())
324 return selectCopy(I, MRI);
325
326 return true;
327 }
328
329 assert(I.getNumOperands() == I.getNumExplicitOperands() &&
330 "Generic instruction has unexpected implicit operands\n");
331
332 if (selectImpl(I, *CoverageInfo))
333 return true;
334
335 LLVM_DEBUG(dbgs() << " C++ instruction selection: "; I.print(dbgs()));
336
337 // TODO: This should be implemented by tblgen.
338 switch (I.getOpcode()) {
339 default:
340 return false;
341 case TargetOpcode::G_STORE:
342 case TargetOpcode::G_LOAD:
343 return selectLoadStoreOp(I, MRI, MF);
344 case TargetOpcode::G_PTR_ADD:
345 case TargetOpcode::G_FRAME_INDEX:
346 return selectFrameIndexOrGep(I, MRI, MF);
347 case TargetOpcode::G_GLOBAL_VALUE:
348 return selectGlobalValue(I, MRI, MF);
349 case TargetOpcode::G_CONSTANT:
350 return selectConstant(I, MRI, MF);
351 case TargetOpcode::G_FCONSTANT:
352 return materializeFP(I, MRI, MF);
353 case TargetOpcode::G_PTRTOINT:
354 case TargetOpcode::G_TRUNC:
355 return selectTruncOrPtrToInt(I, MRI, MF);
356 case TargetOpcode::G_INTTOPTR:
357 return selectCopy(I, MRI);
358 case TargetOpcode::G_ZEXT:
359 return selectZext(I, MRI, MF);
360 case TargetOpcode::G_ANYEXT:
361 return selectAnyext(I, MRI, MF);
362 case TargetOpcode::G_ICMP:
363 return selectCmp(I, MRI, MF);
364 case TargetOpcode::G_FCMP:
365 return selectFCmp(I, MRI, MF);
366 case TargetOpcode::G_UADDE:
367 return selectUadde(I, MRI, MF);
368 case TargetOpcode::G_UNMERGE_VALUES:
369 return selectUnmergeValues(I, MRI, MF);
370 case TargetOpcode::G_MERGE_VALUES:
371 case TargetOpcode::G_CONCAT_VECTORS:
372 return selectMergeValues(I, MRI, MF);
373 case TargetOpcode::G_EXTRACT:
374 return selectExtract(I, MRI, MF);
375 case TargetOpcode::G_INSERT:
376 return selectInsert(I, MRI, MF);
377 case TargetOpcode::G_BRCOND:
378 return selectCondBranch(I, MRI, MF);
379 case TargetOpcode::G_IMPLICIT_DEF:
380 case TargetOpcode::G_PHI:
381 return selectImplicitDefOrPHI(I, MRI);
382 case TargetOpcode::G_SDIV:
383 case TargetOpcode::G_UDIV:
384 case TargetOpcode::G_SREM:
385 case TargetOpcode::G_UREM:
386 return selectDivRem(I, MRI, MF);
387 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
388 return selectIntrinsicWSideEffects(I, MRI, MF);
389 }
390
391 return false;
392 }
393
getLoadStoreOp(const LLT & Ty,const RegisterBank & RB,unsigned Opc,uint64_t Alignment) const394 unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty,
395 const RegisterBank &RB,
396 unsigned Opc,
397 uint64_t Alignment) const {
398 bool Isload = (Opc == TargetOpcode::G_LOAD);
399 bool HasAVX = STI.hasAVX();
400 bool HasAVX512 = STI.hasAVX512();
401 bool HasVLX = STI.hasVLX();
402
403 if (Ty == LLT::scalar(8)) {
404 if (X86::GPRRegBankID == RB.getID())
405 return Isload ? X86::MOV8rm : X86::MOV8mr;
406 } else if (Ty == LLT::scalar(16)) {
407 if (X86::GPRRegBankID == RB.getID())
408 return Isload ? X86::MOV16rm : X86::MOV16mr;
409 } else if (Ty == LLT::scalar(32) || Ty == LLT::pointer(0, 32)) {
410 if (X86::GPRRegBankID == RB.getID())
411 return Isload ? X86::MOV32rm : X86::MOV32mr;
412 if (X86::VECRRegBankID == RB.getID())
413 return Isload ? (HasAVX512 ? X86::VMOVSSZrm_alt :
414 HasAVX ? X86::VMOVSSrm_alt :
415 X86::MOVSSrm_alt)
416 : (HasAVX512 ? X86::VMOVSSZmr :
417 HasAVX ? X86::VMOVSSmr :
418 X86::MOVSSmr);
419 } else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
420 if (X86::GPRRegBankID == RB.getID())
421 return Isload ? X86::MOV64rm : X86::MOV64mr;
422 if (X86::VECRRegBankID == RB.getID())
423 return Isload ? (HasAVX512 ? X86::VMOVSDZrm_alt :
424 HasAVX ? X86::VMOVSDrm_alt :
425 X86::MOVSDrm_alt)
426 : (HasAVX512 ? X86::VMOVSDZmr :
427 HasAVX ? X86::VMOVSDmr :
428 X86::MOVSDmr);
429 } else if (Ty.isVector() && Ty.getSizeInBits() == 128) {
430 if (Alignment >= 16)
431 return Isload ? (HasVLX ? X86::VMOVAPSZ128rm
432 : HasAVX512
433 ? X86::VMOVAPSZ128rm_NOVLX
434 : HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm)
435 : (HasVLX ? X86::VMOVAPSZ128mr
436 : HasAVX512
437 ? X86::VMOVAPSZ128mr_NOVLX
438 : HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr);
439 else
440 return Isload ? (HasVLX ? X86::VMOVUPSZ128rm
441 : HasAVX512
442 ? X86::VMOVUPSZ128rm_NOVLX
443 : HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm)
444 : (HasVLX ? X86::VMOVUPSZ128mr
445 : HasAVX512
446 ? X86::VMOVUPSZ128mr_NOVLX
447 : HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr);
448 } else if (Ty.isVector() && Ty.getSizeInBits() == 256) {
449 if (Alignment >= 32)
450 return Isload ? (HasVLX ? X86::VMOVAPSZ256rm
451 : HasAVX512 ? X86::VMOVAPSZ256rm_NOVLX
452 : X86::VMOVAPSYrm)
453 : (HasVLX ? X86::VMOVAPSZ256mr
454 : HasAVX512 ? X86::VMOVAPSZ256mr_NOVLX
455 : X86::VMOVAPSYmr);
456 else
457 return Isload ? (HasVLX ? X86::VMOVUPSZ256rm
458 : HasAVX512 ? X86::VMOVUPSZ256rm_NOVLX
459 : X86::VMOVUPSYrm)
460 : (HasVLX ? X86::VMOVUPSZ256mr
461 : HasAVX512 ? X86::VMOVUPSZ256mr_NOVLX
462 : X86::VMOVUPSYmr);
463 } else if (Ty.isVector() && Ty.getSizeInBits() == 512) {
464 if (Alignment >= 64)
465 return Isload ? X86::VMOVAPSZrm : X86::VMOVAPSZmr;
466 else
467 return Isload ? X86::VMOVUPSZrm : X86::VMOVUPSZmr;
468 }
469 return Opc;
470 }
471
472 // Fill in an address from the given instruction.
X86SelectAddress(const MachineInstr & I,const MachineRegisterInfo & MRI,X86AddressMode & AM)473 static void X86SelectAddress(const MachineInstr &I,
474 const MachineRegisterInfo &MRI,
475 X86AddressMode &AM) {
476 assert(I.getOperand(0).isReg() && "unsupported opperand.");
477 assert(MRI.getType(I.getOperand(0).getReg()).isPointer() &&
478 "unsupported type.");
479
480 if (I.getOpcode() == TargetOpcode::G_PTR_ADD) {
481 if (auto COff = getConstantVRegVal(I.getOperand(2).getReg(), MRI)) {
482 int64_t Imm = *COff;
483 if (isInt<32>(Imm)) { // Check for displacement overflow.
484 AM.Disp = static_cast<int32_t>(Imm);
485 AM.Base.Reg = I.getOperand(1).getReg();
486 return;
487 }
488 }
489 } else if (I.getOpcode() == TargetOpcode::G_FRAME_INDEX) {
490 AM.Base.FrameIndex = I.getOperand(1).getIndex();
491 AM.BaseType = X86AddressMode::FrameIndexBase;
492 return;
493 }
494
495 // Default behavior.
496 AM.Base.Reg = I.getOperand(0).getReg();
497 }
498
selectLoadStoreOp(MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF) const499 bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I,
500 MachineRegisterInfo &MRI,
501 MachineFunction &MF) const {
502 unsigned Opc = I.getOpcode();
503
504 assert((Opc == TargetOpcode::G_STORE || Opc == TargetOpcode::G_LOAD) &&
505 "unexpected instruction");
506
507 const Register DefReg = I.getOperand(0).getReg();
508 LLT Ty = MRI.getType(DefReg);
509 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
510
511 assert(I.hasOneMemOperand());
512 auto &MemOp = **I.memoperands_begin();
513 if (MemOp.isAtomic()) {
514 // Note: for unordered operations, we rely on the fact the appropriate MMO
515 // is already on the instruction we're mutating, and thus we don't need to
516 // make any changes. So long as we select an opcode which is capable of
517 // loading or storing the appropriate size atomically, the rest of the
518 // backend is required to respect the MMO state.
519 if (!MemOp.isUnordered()) {
520 LLVM_DEBUG(dbgs() << "Atomic ordering not supported yet\n");
521 return false;
522 }
523 if (MemOp.getAlignment() < Ty.getSizeInBits()/8) {
524 LLVM_DEBUG(dbgs() << "Unaligned atomics not supported yet\n");
525 return false;
526 }
527 }
528
529 unsigned NewOpc = getLoadStoreOp(Ty, RB, Opc, MemOp.getAlignment());
530 if (NewOpc == Opc)
531 return false;
532
533 X86AddressMode AM;
534 X86SelectAddress(*MRI.getVRegDef(I.getOperand(1).getReg()), MRI, AM);
535
536 I.setDesc(TII.get(NewOpc));
537 MachineInstrBuilder MIB(MF, I);
538 if (Opc == TargetOpcode::G_LOAD) {
539 I.RemoveOperand(1);
540 addFullAddress(MIB, AM);
541 } else {
542 // G_STORE (VAL, Addr), X86Store instruction (Addr, VAL)
543 I.RemoveOperand(1);
544 I.RemoveOperand(0);
545 addFullAddress(MIB, AM).addUse(DefReg);
546 }
547 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
548 }
549
getLeaOP(LLT Ty,const X86Subtarget & STI)550 static unsigned getLeaOP(LLT Ty, const X86Subtarget &STI) {
551 if (Ty == LLT::pointer(0, 64))
552 return X86::LEA64r;
553 else if (Ty == LLT::pointer(0, 32))
554 return STI.isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r;
555 else
556 llvm_unreachable("Can't get LEA opcode. Unsupported type.");
557 }
558
selectFrameIndexOrGep(MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF) const559 bool X86InstructionSelector::selectFrameIndexOrGep(MachineInstr &I,
560 MachineRegisterInfo &MRI,
561 MachineFunction &MF) const {
562 unsigned Opc = I.getOpcode();
563
564 assert((Opc == TargetOpcode::G_FRAME_INDEX || Opc == TargetOpcode::G_PTR_ADD) &&
565 "unexpected instruction");
566
567 const Register DefReg = I.getOperand(0).getReg();
568 LLT Ty = MRI.getType(DefReg);
569
570 // Use LEA to calculate frame index and GEP
571 unsigned NewOpc = getLeaOP(Ty, STI);
572 I.setDesc(TII.get(NewOpc));
573 MachineInstrBuilder MIB(MF, I);
574
575 if (Opc == TargetOpcode::G_FRAME_INDEX) {
576 addOffset(MIB, 0);
577 } else {
578 MachineOperand &InxOp = I.getOperand(2);
579 I.addOperand(InxOp); // set IndexReg
580 InxOp.ChangeToImmediate(1); // set Scale
581 MIB.addImm(0).addReg(0);
582 }
583
584 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
585 }
586
selectGlobalValue(MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF) const587 bool X86InstructionSelector::selectGlobalValue(MachineInstr &I,
588 MachineRegisterInfo &MRI,
589 MachineFunction &MF) const {
590 assert((I.getOpcode() == TargetOpcode::G_GLOBAL_VALUE) &&
591 "unexpected instruction");
592
593 auto GV = I.getOperand(1).getGlobal();
594 if (GV->isThreadLocal()) {
595 return false; // TODO: we don't support TLS yet.
596 }
597
598 // Can't handle alternate code models yet.
599 if (TM.getCodeModel() != CodeModel::Small)
600 return false;
601
602 X86AddressMode AM;
603 AM.GV = GV;
604 AM.GVOpFlags = STI.classifyGlobalReference(GV);
605
606 // TODO: The ABI requires an extra load. not supported yet.
607 if (isGlobalStubReference(AM.GVOpFlags))
608 return false;
609
610 // TODO: This reference is relative to the pic base. not supported yet.
611 if (isGlobalRelativeToPICBase(AM.GVOpFlags))
612 return false;
613
614 if (STI.isPICStyleRIPRel()) {
615 // Use rip-relative addressing.
616 assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
617 AM.Base.Reg = X86::RIP;
618 }
619
620 const Register DefReg = I.getOperand(0).getReg();
621 LLT Ty = MRI.getType(DefReg);
622 unsigned NewOpc = getLeaOP(Ty, STI);
623
624 I.setDesc(TII.get(NewOpc));
625 MachineInstrBuilder MIB(MF, I);
626
627 I.RemoveOperand(1);
628 addFullAddress(MIB, AM);
629
630 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
631 }
632
selectConstant(MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF) const633 bool X86InstructionSelector::selectConstant(MachineInstr &I,
634 MachineRegisterInfo &MRI,
635 MachineFunction &MF) const {
636 assert((I.getOpcode() == TargetOpcode::G_CONSTANT) &&
637 "unexpected instruction");
638
639 const Register DefReg = I.getOperand(0).getReg();
640 LLT Ty = MRI.getType(DefReg);
641
642 if (RBI.getRegBank(DefReg, MRI, TRI)->getID() != X86::GPRRegBankID)
643 return false;
644
645 uint64_t Val = 0;
646 if (I.getOperand(1).isCImm()) {
647 Val = I.getOperand(1).getCImm()->getZExtValue();
648 I.getOperand(1).ChangeToImmediate(Val);
649 } else if (I.getOperand(1).isImm()) {
650 Val = I.getOperand(1).getImm();
651 } else
652 llvm_unreachable("Unsupported operand type.");
653
654 unsigned NewOpc;
655 switch (Ty.getSizeInBits()) {
656 case 8:
657 NewOpc = X86::MOV8ri;
658 break;
659 case 16:
660 NewOpc = X86::MOV16ri;
661 break;
662 case 32:
663 NewOpc = X86::MOV32ri;
664 break;
665 case 64:
666 // TODO: in case isUInt<32>(Val), X86::MOV32ri can be used
667 if (isInt<32>(Val))
668 NewOpc = X86::MOV64ri32;
669 else
670 NewOpc = X86::MOV64ri;
671 break;
672 default:
673 llvm_unreachable("Can't select G_CONSTANT, unsupported type.");
674 }
675
676 I.setDesc(TII.get(NewOpc));
677 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
678 }
679
680 // Helper function for selectTruncOrPtrToInt and selectAnyext.
681 // Returns true if DstRC lives on a floating register class and
682 // SrcRC lives on a 128-bit vector class.
canTurnIntoCOPY(const TargetRegisterClass * DstRC,const TargetRegisterClass * SrcRC)683 static bool canTurnIntoCOPY(const TargetRegisterClass *DstRC,
684 const TargetRegisterClass *SrcRC) {
685 return (DstRC == &X86::FR32RegClass || DstRC == &X86::FR32XRegClass ||
686 DstRC == &X86::FR64RegClass || DstRC == &X86::FR64XRegClass) &&
687 (SrcRC == &X86::VR128RegClass || SrcRC == &X86::VR128XRegClass);
688 }
689
selectTurnIntoCOPY(MachineInstr & I,MachineRegisterInfo & MRI,const unsigned DstReg,const TargetRegisterClass * DstRC,const unsigned SrcReg,const TargetRegisterClass * SrcRC) const690 bool X86InstructionSelector::selectTurnIntoCOPY(
691 MachineInstr &I, MachineRegisterInfo &MRI, const unsigned DstReg,
692 const TargetRegisterClass *DstRC, const unsigned SrcReg,
693 const TargetRegisterClass *SrcRC) const {
694
695 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
696 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
697 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
698 << " operand\n");
699 return false;
700 }
701 I.setDesc(TII.get(X86::COPY));
702 return true;
703 }
704
selectTruncOrPtrToInt(MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF) const705 bool X86InstructionSelector::selectTruncOrPtrToInt(MachineInstr &I,
706 MachineRegisterInfo &MRI,
707 MachineFunction &MF) const {
708 assert((I.getOpcode() == TargetOpcode::G_TRUNC ||
709 I.getOpcode() == TargetOpcode::G_PTRTOINT) &&
710 "unexpected instruction");
711
712 const Register DstReg = I.getOperand(0).getReg();
713 const Register SrcReg = I.getOperand(1).getReg();
714
715 const LLT DstTy = MRI.getType(DstReg);
716 const LLT SrcTy = MRI.getType(SrcReg);
717
718 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
719 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
720
721 if (DstRB.getID() != SrcRB.getID()) {
722 LLVM_DEBUG(dbgs() << TII.getName(I.getOpcode())
723 << " input/output on different banks\n");
724 return false;
725 }
726
727 const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB);
728 const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcRB);
729
730 if (!DstRC || !SrcRC)
731 return false;
732
733 // If that's truncation of the value that lives on the vector class and goes
734 // into the floating class, just replace it with copy, as we are able to
735 // select it as a regular move.
736 if (canTurnIntoCOPY(DstRC, SrcRC))
737 return selectTurnIntoCOPY(I, MRI, DstReg, DstRC, SrcReg, SrcRC);
738
739 if (DstRB.getID() != X86::GPRRegBankID)
740 return false;
741
742 unsigned SubIdx;
743 if (DstRC == SrcRC) {
744 // Nothing to be done
745 SubIdx = X86::NoSubRegister;
746 } else if (DstRC == &X86::GR32RegClass) {
747 SubIdx = X86::sub_32bit;
748 } else if (DstRC == &X86::GR16RegClass) {
749 SubIdx = X86::sub_16bit;
750 } else if (DstRC == &X86::GR8RegClass) {
751 SubIdx = X86::sub_8bit;
752 } else {
753 return false;
754 }
755
756 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubIdx);
757
758 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
759 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
760 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
761 << "\n");
762 return false;
763 }
764
765 I.getOperand(1).setSubReg(SubIdx);
766
767 I.setDesc(TII.get(X86::COPY));
768 return true;
769 }
770
selectZext(MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF) const771 bool X86InstructionSelector::selectZext(MachineInstr &I,
772 MachineRegisterInfo &MRI,
773 MachineFunction &MF) const {
774 assert((I.getOpcode() == TargetOpcode::G_ZEXT) && "unexpected instruction");
775
776 const Register DstReg = I.getOperand(0).getReg();
777 const Register SrcReg = I.getOperand(1).getReg();
778
779 const LLT DstTy = MRI.getType(DstReg);
780 const LLT SrcTy = MRI.getType(SrcReg);
781
782 assert(!(SrcTy == LLT::scalar(8) && DstTy == LLT::scalar(32)) &&
783 "8=>32 Zext is handled by tablegen");
784 assert(!(SrcTy == LLT::scalar(16) && DstTy == LLT::scalar(32)) &&
785 "16=>32 Zext is handled by tablegen");
786
787 const static struct ZextEntry {
788 LLT SrcTy;
789 LLT DstTy;
790 unsigned MovOp;
791 bool NeedSubregToReg;
792 } OpTable[] = {
793 {LLT::scalar(8), LLT::scalar(16), X86::MOVZX16rr8, false}, // i8 => i16
794 {LLT::scalar(8), LLT::scalar(64), X86::MOVZX32rr8, true}, // i8 => i64
795 {LLT::scalar(16), LLT::scalar(64), X86::MOVZX32rr16, true}, // i16 => i64
796 {LLT::scalar(32), LLT::scalar(64), 0, true} // i32 => i64
797 };
798
799 auto ZextEntryIt =
800 std::find_if(std::begin(OpTable), std::end(OpTable),
801 [SrcTy, DstTy](const ZextEntry &El) {
802 return El.DstTy == DstTy && El.SrcTy == SrcTy;
803 });
804
805 // Here we try to select Zext into a MOVZ and/or SUBREG_TO_REG instruction.
806 if (ZextEntryIt != std::end(OpTable)) {
807 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
808 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
809 const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB);
810 const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcRB);
811
812 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
813 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
814 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
815 << " operand\n");
816 return false;
817 }
818
819 unsigned TransitRegTo = DstReg;
820 unsigned TransitRegFrom = SrcReg;
821 if (ZextEntryIt->MovOp) {
822 // If we select Zext into MOVZ + SUBREG_TO_REG, we need to have
823 // a transit register in between: create it here.
824 if (ZextEntryIt->NeedSubregToReg) {
825 TransitRegFrom = MRI.createVirtualRegister(
826 getRegClass(LLT::scalar(32), DstReg, MRI));
827 TransitRegTo = TransitRegFrom;
828 }
829
830 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(ZextEntryIt->MovOp))
831 .addDef(TransitRegTo)
832 .addReg(SrcReg);
833 }
834 if (ZextEntryIt->NeedSubregToReg) {
835 BuildMI(*I.getParent(), I, I.getDebugLoc(),
836 TII.get(TargetOpcode::SUBREG_TO_REG))
837 .addDef(DstReg)
838 .addImm(0)
839 .addReg(TransitRegFrom)
840 .addImm(X86::sub_32bit);
841 }
842 I.eraseFromParent();
843 return true;
844 }
845
846 if (SrcTy != LLT::scalar(1))
847 return false;
848
849 unsigned AndOpc;
850 if (DstTy == LLT::scalar(8))
851 AndOpc = X86::AND8ri;
852 else if (DstTy == LLT::scalar(16))
853 AndOpc = X86::AND16ri8;
854 else if (DstTy == LLT::scalar(32))
855 AndOpc = X86::AND32ri8;
856 else if (DstTy == LLT::scalar(64))
857 AndOpc = X86::AND64ri8;
858 else
859 return false;
860
861 unsigned DefReg = SrcReg;
862 if (DstTy != LLT::scalar(8)) {
863 DefReg = MRI.createVirtualRegister(getRegClass(DstTy, DstReg, MRI));
864 BuildMI(*I.getParent(), I, I.getDebugLoc(),
865 TII.get(TargetOpcode::SUBREG_TO_REG), DefReg)
866 .addImm(0)
867 .addReg(SrcReg)
868 .addImm(X86::sub_8bit);
869 }
870
871 MachineInstr &AndInst =
872 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AndOpc), DstReg)
873 .addReg(DefReg)
874 .addImm(1);
875
876 constrainSelectedInstRegOperands(AndInst, TII, TRI, RBI);
877
878 I.eraseFromParent();
879 return true;
880 }
881
selectAnyext(MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF) const882 bool X86InstructionSelector::selectAnyext(MachineInstr &I,
883 MachineRegisterInfo &MRI,
884 MachineFunction &MF) const {
885 assert((I.getOpcode() == TargetOpcode::G_ANYEXT) && "unexpected instruction");
886
887 const Register DstReg = I.getOperand(0).getReg();
888 const Register SrcReg = I.getOperand(1).getReg();
889
890 const LLT DstTy = MRI.getType(DstReg);
891 const LLT SrcTy = MRI.getType(SrcReg);
892
893 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
894 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
895
896 assert(DstRB.getID() == SrcRB.getID() &&
897 "G_ANYEXT input/output on different banks\n");
898
899 assert(DstTy.getSizeInBits() > SrcTy.getSizeInBits() &&
900 "G_ANYEXT incorrect operand size");
901
902 const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB);
903 const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcRB);
904
905 // If that's ANY_EXT of the value that lives on the floating class and goes
906 // into the vector class, just replace it with copy, as we are able to select
907 // it as a regular move.
908 if (canTurnIntoCOPY(SrcRC, DstRC))
909 return selectTurnIntoCOPY(I, MRI, SrcReg, SrcRC, DstReg, DstRC);
910
911 if (DstRB.getID() != X86::GPRRegBankID)
912 return false;
913
914 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
915 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
916 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
917 << " operand\n");
918 return false;
919 }
920
921 if (SrcRC == DstRC) {
922 I.setDesc(TII.get(X86::COPY));
923 return true;
924 }
925
926 BuildMI(*I.getParent(), I, I.getDebugLoc(),
927 TII.get(TargetOpcode::SUBREG_TO_REG))
928 .addDef(DstReg)
929 .addImm(0)
930 .addReg(SrcReg)
931 .addImm(getSubRegIndex(SrcRC));
932
933 I.eraseFromParent();
934 return true;
935 }
936
selectCmp(MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF) const937 bool X86InstructionSelector::selectCmp(MachineInstr &I,
938 MachineRegisterInfo &MRI,
939 MachineFunction &MF) const {
940 assert((I.getOpcode() == TargetOpcode::G_ICMP) && "unexpected instruction");
941
942 X86::CondCode CC;
943 bool SwapArgs;
944 std::tie(CC, SwapArgs) = X86::getX86ConditionCode(
945 (CmpInst::Predicate)I.getOperand(1).getPredicate());
946
947 Register LHS = I.getOperand(2).getReg();
948 Register RHS = I.getOperand(3).getReg();
949
950 if (SwapArgs)
951 std::swap(LHS, RHS);
952
953 unsigned OpCmp;
954 LLT Ty = MRI.getType(LHS);
955
956 switch (Ty.getSizeInBits()) {
957 default:
958 return false;
959 case 8:
960 OpCmp = X86::CMP8rr;
961 break;
962 case 16:
963 OpCmp = X86::CMP16rr;
964 break;
965 case 32:
966 OpCmp = X86::CMP32rr;
967 break;
968 case 64:
969 OpCmp = X86::CMP64rr;
970 break;
971 }
972
973 MachineInstr &CmpInst =
974 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpCmp))
975 .addReg(LHS)
976 .addReg(RHS);
977
978 MachineInstr &SetInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
979 TII.get(X86::SETCCr), I.getOperand(0).getReg()).addImm(CC);
980
981 constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI);
982 constrainSelectedInstRegOperands(SetInst, TII, TRI, RBI);
983
984 I.eraseFromParent();
985 return true;
986 }
987
selectFCmp(MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF) const988 bool X86InstructionSelector::selectFCmp(MachineInstr &I,
989 MachineRegisterInfo &MRI,
990 MachineFunction &MF) const {
991 assert((I.getOpcode() == TargetOpcode::G_FCMP) && "unexpected instruction");
992
993 Register LhsReg = I.getOperand(2).getReg();
994 Register RhsReg = I.getOperand(3).getReg();
995 CmpInst::Predicate Predicate =
996 (CmpInst::Predicate)I.getOperand(1).getPredicate();
997
998 // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
999 static const uint16_t SETFOpcTable[2][3] = {
1000 {X86::COND_E, X86::COND_NP, X86::AND8rr},
1001 {X86::COND_NE, X86::COND_P, X86::OR8rr}};
1002 const uint16_t *SETFOpc = nullptr;
1003 switch (Predicate) {
1004 default:
1005 break;
1006 case CmpInst::FCMP_OEQ:
1007 SETFOpc = &SETFOpcTable[0][0];
1008 break;
1009 case CmpInst::FCMP_UNE:
1010 SETFOpc = &SETFOpcTable[1][0];
1011 break;
1012 }
1013
1014 // Compute the opcode for the CMP instruction.
1015 unsigned OpCmp;
1016 LLT Ty = MRI.getType(LhsReg);
1017 switch (Ty.getSizeInBits()) {
1018 default:
1019 return false;
1020 case 32:
1021 OpCmp = X86::UCOMISSrr;
1022 break;
1023 case 64:
1024 OpCmp = X86::UCOMISDrr;
1025 break;
1026 }
1027
1028 Register ResultReg = I.getOperand(0).getReg();
1029 RBI.constrainGenericRegister(
1030 ResultReg,
1031 *getRegClass(LLT::scalar(8), *RBI.getRegBank(ResultReg, MRI, TRI)), MRI);
1032 if (SETFOpc) {
1033 MachineInstr &CmpInst =
1034 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpCmp))
1035 .addReg(LhsReg)
1036 .addReg(RhsReg);
1037
1038 Register FlagReg1 = MRI.createVirtualRegister(&X86::GR8RegClass);
1039 Register FlagReg2 = MRI.createVirtualRegister(&X86::GR8RegClass);
1040 MachineInstr &Set1 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
1041 TII.get(X86::SETCCr), FlagReg1).addImm(SETFOpc[0]);
1042 MachineInstr &Set2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
1043 TII.get(X86::SETCCr), FlagReg2).addImm(SETFOpc[1]);
1044 MachineInstr &Set3 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
1045 TII.get(SETFOpc[2]), ResultReg)
1046 .addReg(FlagReg1)
1047 .addReg(FlagReg2);
1048 constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI);
1049 constrainSelectedInstRegOperands(Set1, TII, TRI, RBI);
1050 constrainSelectedInstRegOperands(Set2, TII, TRI, RBI);
1051 constrainSelectedInstRegOperands(Set3, TII, TRI, RBI);
1052
1053 I.eraseFromParent();
1054 return true;
1055 }
1056
1057 X86::CondCode CC;
1058 bool SwapArgs;
1059 std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
1060 assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1061
1062 if (SwapArgs)
1063 std::swap(LhsReg, RhsReg);
1064
1065 // Emit a compare of LHS/RHS.
1066 MachineInstr &CmpInst =
1067 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpCmp))
1068 .addReg(LhsReg)
1069 .addReg(RhsReg);
1070
1071 MachineInstr &Set =
1072 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::SETCCr), ResultReg).addImm(CC);
1073 constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI);
1074 constrainSelectedInstRegOperands(Set, TII, TRI, RBI);
1075 I.eraseFromParent();
1076 return true;
1077 }
1078
selectUadde(MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF) const1079 bool X86InstructionSelector::selectUadde(MachineInstr &I,
1080 MachineRegisterInfo &MRI,
1081 MachineFunction &MF) const {
1082 assert((I.getOpcode() == TargetOpcode::G_UADDE) && "unexpected instruction");
1083
1084 const Register DstReg = I.getOperand(0).getReg();
1085 const Register CarryOutReg = I.getOperand(1).getReg();
1086 const Register Op0Reg = I.getOperand(2).getReg();
1087 const Register Op1Reg = I.getOperand(3).getReg();
1088 Register CarryInReg = I.getOperand(4).getReg();
1089
1090 const LLT DstTy = MRI.getType(DstReg);
1091
1092 if (DstTy != LLT::scalar(32))
1093 return false;
1094
1095 // find CarryIn def instruction.
1096 MachineInstr *Def = MRI.getVRegDef(CarryInReg);
1097 while (Def->getOpcode() == TargetOpcode::G_TRUNC) {
1098 CarryInReg = Def->getOperand(1).getReg();
1099 Def = MRI.getVRegDef(CarryInReg);
1100 }
1101
1102 unsigned Opcode;
1103 if (Def->getOpcode() == TargetOpcode::G_UADDE) {
1104 // carry set by prev ADD.
1105
1106 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), X86::EFLAGS)
1107 .addReg(CarryInReg);
1108
1109 if (!RBI.constrainGenericRegister(CarryInReg, X86::GR32RegClass, MRI))
1110 return false;
1111
1112 Opcode = X86::ADC32rr;
1113 } else if (auto val = getConstantVRegVal(CarryInReg, MRI)) {
1114 // carry is constant, support only 0.
1115 if (*val != 0)
1116 return false;
1117
1118 Opcode = X86::ADD32rr;
1119 } else
1120 return false;
1121
1122 MachineInstr &AddInst =
1123 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opcode), DstReg)
1124 .addReg(Op0Reg)
1125 .addReg(Op1Reg);
1126
1127 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), CarryOutReg)
1128 .addReg(X86::EFLAGS);
1129
1130 if (!constrainSelectedInstRegOperands(AddInst, TII, TRI, RBI) ||
1131 !RBI.constrainGenericRegister(CarryOutReg, X86::GR32RegClass, MRI))
1132 return false;
1133
1134 I.eraseFromParent();
1135 return true;
1136 }
1137
selectExtract(MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF) const1138 bool X86InstructionSelector::selectExtract(MachineInstr &I,
1139 MachineRegisterInfo &MRI,
1140 MachineFunction &MF) const {
1141 assert((I.getOpcode() == TargetOpcode::G_EXTRACT) &&
1142 "unexpected instruction");
1143
1144 const Register DstReg = I.getOperand(0).getReg();
1145 const Register SrcReg = I.getOperand(1).getReg();
1146 int64_t Index = I.getOperand(2).getImm();
1147
1148 const LLT DstTy = MRI.getType(DstReg);
1149 const LLT SrcTy = MRI.getType(SrcReg);
1150
1151 // Meanwile handle vector type only.
1152 if (!DstTy.isVector())
1153 return false;
1154
1155 if (Index % DstTy.getSizeInBits() != 0)
1156 return false; // Not extract subvector.
1157
1158 if (Index == 0) {
1159 // Replace by extract subreg copy.
1160 if (!emitExtractSubreg(DstReg, SrcReg, I, MRI, MF))
1161 return false;
1162
1163 I.eraseFromParent();
1164 return true;
1165 }
1166
1167 bool HasAVX = STI.hasAVX();
1168 bool HasAVX512 = STI.hasAVX512();
1169 bool HasVLX = STI.hasVLX();
1170
1171 if (SrcTy.getSizeInBits() == 256 && DstTy.getSizeInBits() == 128) {
1172 if (HasVLX)
1173 I.setDesc(TII.get(X86::VEXTRACTF32x4Z256rr));
1174 else if (HasAVX)
1175 I.setDesc(TII.get(X86::VEXTRACTF128rr));
1176 else
1177 return false;
1178 } else if (SrcTy.getSizeInBits() == 512 && HasAVX512) {
1179 if (DstTy.getSizeInBits() == 128)
1180 I.setDesc(TII.get(X86::VEXTRACTF32x4Zrr));
1181 else if (DstTy.getSizeInBits() == 256)
1182 I.setDesc(TII.get(X86::VEXTRACTF64x4Zrr));
1183 else
1184 return false;
1185 } else
1186 return false;
1187
1188 // Convert to X86 VEXTRACT immediate.
1189 Index = Index / DstTy.getSizeInBits();
1190 I.getOperand(2).setImm(Index);
1191
1192 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1193 }
1194
emitExtractSubreg(unsigned DstReg,unsigned SrcReg,MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF) const1195 bool X86InstructionSelector::emitExtractSubreg(unsigned DstReg, unsigned SrcReg,
1196 MachineInstr &I,
1197 MachineRegisterInfo &MRI,
1198 MachineFunction &MF) const {
1199 const LLT DstTy = MRI.getType(DstReg);
1200 const LLT SrcTy = MRI.getType(SrcReg);
1201 unsigned SubIdx = X86::NoSubRegister;
1202
1203 if (!DstTy.isVector() || !SrcTy.isVector())
1204 return false;
1205
1206 assert(SrcTy.getSizeInBits() > DstTy.getSizeInBits() &&
1207 "Incorrect Src/Dst register size");
1208
1209 if (DstTy.getSizeInBits() == 128)
1210 SubIdx = X86::sub_xmm;
1211 else if (DstTy.getSizeInBits() == 256)
1212 SubIdx = X86::sub_ymm;
1213 else
1214 return false;
1215
1216 const TargetRegisterClass *DstRC = getRegClass(DstTy, DstReg, MRI);
1217 const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcReg, MRI);
1218
1219 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubIdx);
1220
1221 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
1222 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1223 LLVM_DEBUG(dbgs() << "Failed to constrain EXTRACT_SUBREG\n");
1224 return false;
1225 }
1226
1227 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), DstReg)
1228 .addReg(SrcReg, 0, SubIdx);
1229
1230 return true;
1231 }
1232
emitInsertSubreg(unsigned DstReg,unsigned SrcReg,MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF) const1233 bool X86InstructionSelector::emitInsertSubreg(unsigned DstReg, unsigned SrcReg,
1234 MachineInstr &I,
1235 MachineRegisterInfo &MRI,
1236 MachineFunction &MF) const {
1237 const LLT DstTy = MRI.getType(DstReg);
1238 const LLT SrcTy = MRI.getType(SrcReg);
1239 unsigned SubIdx = X86::NoSubRegister;
1240
1241 // TODO: support scalar types
1242 if (!DstTy.isVector() || !SrcTy.isVector())
1243 return false;
1244
1245 assert(SrcTy.getSizeInBits() < DstTy.getSizeInBits() &&
1246 "Incorrect Src/Dst register size");
1247
1248 if (SrcTy.getSizeInBits() == 128)
1249 SubIdx = X86::sub_xmm;
1250 else if (SrcTy.getSizeInBits() == 256)
1251 SubIdx = X86::sub_ymm;
1252 else
1253 return false;
1254
1255 const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcReg, MRI);
1256 const TargetRegisterClass *DstRC = getRegClass(DstTy, DstReg, MRI);
1257
1258 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
1259 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1260 LLVM_DEBUG(dbgs() << "Failed to constrain INSERT_SUBREG\n");
1261 return false;
1262 }
1263
1264 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY))
1265 .addReg(DstReg, RegState::DefineNoRead, SubIdx)
1266 .addReg(SrcReg);
1267
1268 return true;
1269 }
1270
selectInsert(MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF) const1271 bool X86InstructionSelector::selectInsert(MachineInstr &I,
1272 MachineRegisterInfo &MRI,
1273 MachineFunction &MF) const {
1274 assert((I.getOpcode() == TargetOpcode::G_INSERT) && "unexpected instruction");
1275
1276 const Register DstReg = I.getOperand(0).getReg();
1277 const Register SrcReg = I.getOperand(1).getReg();
1278 const Register InsertReg = I.getOperand(2).getReg();
1279 int64_t Index = I.getOperand(3).getImm();
1280
1281 const LLT DstTy = MRI.getType(DstReg);
1282 const LLT InsertRegTy = MRI.getType(InsertReg);
1283
1284 // Meanwile handle vector type only.
1285 if (!DstTy.isVector())
1286 return false;
1287
1288 if (Index % InsertRegTy.getSizeInBits() != 0)
1289 return false; // Not insert subvector.
1290
1291 if (Index == 0 && MRI.getVRegDef(SrcReg)->isImplicitDef()) {
1292 // Replace by subreg copy.
1293 if (!emitInsertSubreg(DstReg, InsertReg, I, MRI, MF))
1294 return false;
1295
1296 I.eraseFromParent();
1297 return true;
1298 }
1299
1300 bool HasAVX = STI.hasAVX();
1301 bool HasAVX512 = STI.hasAVX512();
1302 bool HasVLX = STI.hasVLX();
1303
1304 if (DstTy.getSizeInBits() == 256 && InsertRegTy.getSizeInBits() == 128) {
1305 if (HasVLX)
1306 I.setDesc(TII.get(X86::VINSERTF32x4Z256rr));
1307 else if (HasAVX)
1308 I.setDesc(TII.get(X86::VINSERTF128rr));
1309 else
1310 return false;
1311 } else if (DstTy.getSizeInBits() == 512 && HasAVX512) {
1312 if (InsertRegTy.getSizeInBits() == 128)
1313 I.setDesc(TII.get(X86::VINSERTF32x4Zrr));
1314 else if (InsertRegTy.getSizeInBits() == 256)
1315 I.setDesc(TII.get(X86::VINSERTF64x4Zrr));
1316 else
1317 return false;
1318 } else
1319 return false;
1320
1321 // Convert to X86 VINSERT immediate.
1322 Index = Index / InsertRegTy.getSizeInBits();
1323
1324 I.getOperand(3).setImm(Index);
1325
1326 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1327 }
1328
selectUnmergeValues(MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF)1329 bool X86InstructionSelector::selectUnmergeValues(
1330 MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) {
1331 assert((I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES) &&
1332 "unexpected instruction");
1333
1334 // Split to extracts.
1335 unsigned NumDefs = I.getNumOperands() - 1;
1336 Register SrcReg = I.getOperand(NumDefs).getReg();
1337 unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
1338
1339 for (unsigned Idx = 0; Idx < NumDefs; ++Idx) {
1340 MachineInstr &ExtrInst =
1341 *BuildMI(*I.getParent(), I, I.getDebugLoc(),
1342 TII.get(TargetOpcode::G_EXTRACT), I.getOperand(Idx).getReg())
1343 .addReg(SrcReg)
1344 .addImm(Idx * DefSize);
1345
1346 if (!select(ExtrInst))
1347 return false;
1348 }
1349
1350 I.eraseFromParent();
1351 return true;
1352 }
1353
selectMergeValues(MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF)1354 bool X86InstructionSelector::selectMergeValues(
1355 MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) {
1356 assert((I.getOpcode() == TargetOpcode::G_MERGE_VALUES ||
1357 I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS) &&
1358 "unexpected instruction");
1359
1360 // Split to inserts.
1361 Register DstReg = I.getOperand(0).getReg();
1362 Register SrcReg0 = I.getOperand(1).getReg();
1363
1364 const LLT DstTy = MRI.getType(DstReg);
1365 const LLT SrcTy = MRI.getType(SrcReg0);
1366 unsigned SrcSize = SrcTy.getSizeInBits();
1367
1368 const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
1369
1370 // For the first src use insertSubReg.
1371 Register DefReg = MRI.createGenericVirtualRegister(DstTy);
1372 MRI.setRegBank(DefReg, RegBank);
1373 if (!emitInsertSubreg(DefReg, I.getOperand(1).getReg(), I, MRI, MF))
1374 return false;
1375
1376 for (unsigned Idx = 2; Idx < I.getNumOperands(); ++Idx) {
1377 Register Tmp = MRI.createGenericVirtualRegister(DstTy);
1378 MRI.setRegBank(Tmp, RegBank);
1379
1380 MachineInstr &InsertInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
1381 TII.get(TargetOpcode::G_INSERT), Tmp)
1382 .addReg(DefReg)
1383 .addReg(I.getOperand(Idx).getReg())
1384 .addImm((Idx - 1) * SrcSize);
1385
1386 DefReg = Tmp;
1387
1388 if (!select(InsertInst))
1389 return false;
1390 }
1391
1392 MachineInstr &CopyInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
1393 TII.get(TargetOpcode::COPY), DstReg)
1394 .addReg(DefReg);
1395
1396 if (!select(CopyInst))
1397 return false;
1398
1399 I.eraseFromParent();
1400 return true;
1401 }
1402
selectCondBranch(MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF) const1403 bool X86InstructionSelector::selectCondBranch(MachineInstr &I,
1404 MachineRegisterInfo &MRI,
1405 MachineFunction &MF) const {
1406 assert((I.getOpcode() == TargetOpcode::G_BRCOND) && "unexpected instruction");
1407
1408 const Register CondReg = I.getOperand(0).getReg();
1409 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1410
1411 MachineInstr &TestInst =
1412 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::TEST8ri))
1413 .addReg(CondReg)
1414 .addImm(1);
1415 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::JCC_1))
1416 .addMBB(DestMBB).addImm(X86::COND_NE);
1417
1418 constrainSelectedInstRegOperands(TestInst, TII, TRI, RBI);
1419
1420 I.eraseFromParent();
1421 return true;
1422 }
1423
materializeFP(MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF) const1424 bool X86InstructionSelector::materializeFP(MachineInstr &I,
1425 MachineRegisterInfo &MRI,
1426 MachineFunction &MF) const {
1427 assert((I.getOpcode() == TargetOpcode::G_FCONSTANT) &&
1428 "unexpected instruction");
1429
1430 // Can't handle alternate code models yet.
1431 CodeModel::Model CM = TM.getCodeModel();
1432 if (CM != CodeModel::Small && CM != CodeModel::Large)
1433 return false;
1434
1435 const Register DstReg = I.getOperand(0).getReg();
1436 const LLT DstTy = MRI.getType(DstReg);
1437 const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
1438 unsigned Align = DstTy.getSizeInBits();
1439 const DebugLoc &DbgLoc = I.getDebugLoc();
1440
1441 unsigned Opc = getLoadStoreOp(DstTy, RegBank, TargetOpcode::G_LOAD, Align);
1442
1443 // Create the load from the constant pool.
1444 const ConstantFP *CFP = I.getOperand(1).getFPImm();
1445 unsigned CPI = MF.getConstantPool()->getConstantPoolIndex(CFP, Align);
1446 MachineInstr *LoadInst = nullptr;
1447 unsigned char OpFlag = STI.classifyLocalReference(nullptr);
1448
1449 if (CM == CodeModel::Large && STI.is64Bit()) {
1450 // Under X86-64 non-small code model, GV (and friends) are 64-bits, so
1451 // they cannot be folded into immediate fields.
1452
1453 Register AddrReg = MRI.createVirtualRegister(&X86::GR64RegClass);
1454 BuildMI(*I.getParent(), I, DbgLoc, TII.get(X86::MOV64ri), AddrReg)
1455 .addConstantPoolIndex(CPI, 0, OpFlag);
1456
1457 MachineMemOperand *MMO = MF.getMachineMemOperand(
1458 MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad,
1459 MF.getDataLayout().getPointerSize(), Align);
1460
1461 LoadInst =
1462 addDirectMem(BuildMI(*I.getParent(), I, DbgLoc, TII.get(Opc), DstReg),
1463 AddrReg)
1464 .addMemOperand(MMO);
1465
1466 } else if (CM == CodeModel::Small || !STI.is64Bit()) {
1467 // Handle the case when globals fit in our immediate field.
1468 // This is true for X86-32 always and X86-64 when in -mcmodel=small mode.
1469
1470 // x86-32 PIC requires a PIC base register for constant pools.
1471 unsigned PICBase = 0;
1472 if (OpFlag == X86II::MO_PIC_BASE_OFFSET || OpFlag == X86II::MO_GOTOFF) {
1473 // PICBase can be allocated by TII.getGlobalBaseReg(&MF).
1474 // In DAGISEL the code that initialize it generated by the CGBR pass.
1475 return false; // TODO support the mode.
1476 } else if (STI.is64Bit() && TM.getCodeModel() == CodeModel::Small)
1477 PICBase = X86::RIP;
1478
1479 LoadInst = addConstantPoolReference(
1480 BuildMI(*I.getParent(), I, DbgLoc, TII.get(Opc), DstReg), CPI, PICBase,
1481 OpFlag);
1482 } else
1483 return false;
1484
1485 constrainSelectedInstRegOperands(*LoadInst, TII, TRI, RBI);
1486 I.eraseFromParent();
1487 return true;
1488 }
1489
selectImplicitDefOrPHI(MachineInstr & I,MachineRegisterInfo & MRI) const1490 bool X86InstructionSelector::selectImplicitDefOrPHI(
1491 MachineInstr &I, MachineRegisterInfo &MRI) const {
1492 assert((I.getOpcode() == TargetOpcode::G_IMPLICIT_DEF ||
1493 I.getOpcode() == TargetOpcode::G_PHI) &&
1494 "unexpected instruction");
1495
1496 Register DstReg = I.getOperand(0).getReg();
1497
1498 if (!MRI.getRegClassOrNull(DstReg)) {
1499 const LLT DstTy = MRI.getType(DstReg);
1500 const TargetRegisterClass *RC = getRegClass(DstTy, DstReg, MRI);
1501
1502 if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) {
1503 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1504 << " operand\n");
1505 return false;
1506 }
1507 }
1508
1509 if (I.getOpcode() == TargetOpcode::G_IMPLICIT_DEF)
1510 I.setDesc(TII.get(X86::IMPLICIT_DEF));
1511 else
1512 I.setDesc(TII.get(X86::PHI));
1513
1514 return true;
1515 }
1516
selectDivRem(MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF) const1517 bool X86InstructionSelector::selectDivRem(MachineInstr &I,
1518 MachineRegisterInfo &MRI,
1519 MachineFunction &MF) const {
1520 // The implementation of this function is taken from X86FastISel.
1521 assert((I.getOpcode() == TargetOpcode::G_SDIV ||
1522 I.getOpcode() == TargetOpcode::G_SREM ||
1523 I.getOpcode() == TargetOpcode::G_UDIV ||
1524 I.getOpcode() == TargetOpcode::G_UREM) &&
1525 "unexpected instruction");
1526
1527 const Register DstReg = I.getOperand(0).getReg();
1528 const Register Op1Reg = I.getOperand(1).getReg();
1529 const Register Op2Reg = I.getOperand(2).getReg();
1530
1531 const LLT RegTy = MRI.getType(DstReg);
1532 assert(RegTy == MRI.getType(Op1Reg) && RegTy == MRI.getType(Op2Reg) &&
1533 "Arguments and return value types must match");
1534
1535 const RegisterBank *RegRB = RBI.getRegBank(DstReg, MRI, TRI);
1536 if (!RegRB || RegRB->getID() != X86::GPRRegBankID)
1537 return false;
1538
1539 const static unsigned NumTypes = 4; // i8, i16, i32, i64
1540 const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem
1541 const static bool S = true; // IsSigned
1542 const static bool U = false; // !IsSigned
1543 const static unsigned Copy = TargetOpcode::COPY;
1544 // For the X86 IDIV instruction, in most cases the dividend
1545 // (numerator) must be in a specific register pair highreg:lowreg,
1546 // producing the quotient in lowreg and the remainder in highreg.
1547 // For most data types, to set up the instruction, the dividend is
1548 // copied into lowreg, and lowreg is sign-extended into highreg. The
1549 // exception is i8, where the dividend is defined as a single register rather
1550 // than a register pair, and we therefore directly sign-extend the dividend
1551 // into lowreg, instead of copying, and ignore the highreg.
1552 const static struct DivRemEntry {
1553 // The following portion depends only on the data type.
1554 unsigned SizeInBits;
1555 unsigned LowInReg; // low part of the register pair
1556 unsigned HighInReg; // high part of the register pair
1557 // The following portion depends on both the data type and the operation.
1558 struct DivRemResult {
1559 unsigned OpDivRem; // The specific DIV/IDIV opcode to use.
1560 unsigned OpSignExtend; // Opcode for sign-extending lowreg into
1561 // highreg, or copying a zero into highreg.
1562 unsigned OpCopy; // Opcode for copying dividend into lowreg, or
1563 // zero/sign-extending into lowreg for i8.
1564 unsigned DivRemResultReg; // Register containing the desired result.
1565 bool IsOpSigned; // Whether to use signed or unsigned form.
1566 } ResultTable[NumOps];
1567 } OpTable[NumTypes] = {
1568 {8,
1569 X86::AX,
1570 0,
1571 {
1572 {X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S}, // SDiv
1573 {X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S}, // SRem
1574 {X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U}, // UDiv
1575 {X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U}, // URem
1576 }}, // i8
1577 {16,
1578 X86::AX,
1579 X86::DX,
1580 {
1581 {X86::IDIV16r, X86::CWD, Copy, X86::AX, S}, // SDiv
1582 {X86::IDIV16r, X86::CWD, Copy, X86::DX, S}, // SRem
1583 {X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U}, // UDiv
1584 {X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U}, // URem
1585 }}, // i16
1586 {32,
1587 X86::EAX,
1588 X86::EDX,
1589 {
1590 {X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S}, // SDiv
1591 {X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S}, // SRem
1592 {X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U}, // UDiv
1593 {X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U}, // URem
1594 }}, // i32
1595 {64,
1596 X86::RAX,
1597 X86::RDX,
1598 {
1599 {X86::IDIV64r, X86::CQO, Copy, X86::RAX, S}, // SDiv
1600 {X86::IDIV64r, X86::CQO, Copy, X86::RDX, S}, // SRem
1601 {X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U}, // UDiv
1602 {X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U}, // URem
1603 }}, // i64
1604 };
1605
1606 auto OpEntryIt = std::find_if(std::begin(OpTable), std::end(OpTable),
1607 [RegTy](const DivRemEntry &El) {
1608 return El.SizeInBits == RegTy.getSizeInBits();
1609 });
1610 if (OpEntryIt == std::end(OpTable))
1611 return false;
1612
1613 unsigned OpIndex;
1614 switch (I.getOpcode()) {
1615 default:
1616 llvm_unreachable("Unexpected div/rem opcode");
1617 case TargetOpcode::G_SDIV:
1618 OpIndex = 0;
1619 break;
1620 case TargetOpcode::G_SREM:
1621 OpIndex = 1;
1622 break;
1623 case TargetOpcode::G_UDIV:
1624 OpIndex = 2;
1625 break;
1626 case TargetOpcode::G_UREM:
1627 OpIndex = 3;
1628 break;
1629 }
1630
1631 const DivRemEntry &TypeEntry = *OpEntryIt;
1632 const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
1633
1634 const TargetRegisterClass *RegRC = getRegClass(RegTy, *RegRB);
1635 if (!RBI.constrainGenericRegister(Op1Reg, *RegRC, MRI) ||
1636 !RBI.constrainGenericRegister(Op2Reg, *RegRC, MRI) ||
1637 !RBI.constrainGenericRegister(DstReg, *RegRC, MRI)) {
1638 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1639 << " operand\n");
1640 return false;
1641 }
1642
1643 // Move op1 into low-order input register.
1644 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpEntry.OpCopy),
1645 TypeEntry.LowInReg)
1646 .addReg(Op1Reg);
1647 // Zero-extend or sign-extend into high-order input register.
1648 if (OpEntry.OpSignExtend) {
1649 if (OpEntry.IsOpSigned)
1650 BuildMI(*I.getParent(), I, I.getDebugLoc(),
1651 TII.get(OpEntry.OpSignExtend));
1652 else {
1653 Register Zero32 = MRI.createVirtualRegister(&X86::GR32RegClass);
1654 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::MOV32r0),
1655 Zero32);
1656
1657 // Copy the zero into the appropriate sub/super/identical physical
1658 // register. Unfortunately the operations needed are not uniform enough
1659 // to fit neatly into the table above.
1660 if (RegTy.getSizeInBits() == 16) {
1661 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy),
1662 TypeEntry.HighInReg)
1663 .addReg(Zero32, 0, X86::sub_16bit);
1664 } else if (RegTy.getSizeInBits() == 32) {
1665 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy),
1666 TypeEntry.HighInReg)
1667 .addReg(Zero32);
1668 } else if (RegTy.getSizeInBits() == 64) {
1669 BuildMI(*I.getParent(), I, I.getDebugLoc(),
1670 TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
1671 .addImm(0)
1672 .addReg(Zero32)
1673 .addImm(X86::sub_32bit);
1674 }
1675 }
1676 }
1677 // Generate the DIV/IDIV instruction.
1678 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpEntry.OpDivRem))
1679 .addReg(Op2Reg);
1680 // For i8 remainder, we can't reference ah directly, as we'll end
1681 // up with bogus copies like %r9b = COPY %ah. Reference ax
1682 // instead to prevent ah references in a rex instruction.
1683 //
1684 // The current assumption of the fast register allocator is that isel
1685 // won't generate explicit references to the GR8_NOREX registers. If
1686 // the allocator and/or the backend get enhanced to be more robust in
1687 // that regard, this can be, and should be, removed.
1688 if ((I.getOpcode() == Instruction::SRem ||
1689 I.getOpcode() == Instruction::URem) &&
1690 OpEntry.DivRemResultReg == X86::AH && STI.is64Bit()) {
1691 Register SourceSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass);
1692 Register ResultSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass);
1693 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy), SourceSuperReg)
1694 .addReg(X86::AX);
1695
1696 // Shift AX right by 8 bits instead of using AH.
1697 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::SHR16ri),
1698 ResultSuperReg)
1699 .addReg(SourceSuperReg)
1700 .addImm(8);
1701
1702 // Now reference the 8-bit subreg of the result.
1703 BuildMI(*I.getParent(), I, I.getDebugLoc(),
1704 TII.get(TargetOpcode::SUBREG_TO_REG))
1705 .addDef(DstReg)
1706 .addImm(0)
1707 .addReg(ResultSuperReg)
1708 .addImm(X86::sub_8bit);
1709 } else {
1710 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY),
1711 DstReg)
1712 .addReg(OpEntry.DivRemResultReg);
1713 }
1714 I.eraseFromParent();
1715 return true;
1716 }
1717
selectIntrinsicWSideEffects(MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF) const1718 bool X86InstructionSelector::selectIntrinsicWSideEffects(
1719 MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const {
1720
1721 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS &&
1722 "unexpected instruction");
1723
1724 if (I.getOperand(0).getIntrinsicID() != Intrinsic::trap)
1725 return false;
1726
1727 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::TRAP));
1728
1729 I.eraseFromParent();
1730 return true;
1731 }
1732
1733 InstructionSelector *
createX86InstructionSelector(const X86TargetMachine & TM,X86Subtarget & Subtarget,X86RegisterBankInfo & RBI)1734 llvm::createX86InstructionSelector(const X86TargetMachine &TM,
1735 X86Subtarget &Subtarget,
1736 X86RegisterBankInfo &RBI) {
1737 return new X86InstructionSelector(TM, Subtarget, RBI);
1738 }
1739