1 //===- X86InstructionSelector.cpp -----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the InstructionSelector class for
10 /// X86.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "MCTargetDesc/X86BaseInfo.h"
15 #include "X86InstrBuilder.h"
16 #include "X86InstrInfo.h"
17 #include "X86RegisterBankInfo.h"
18 #include "X86RegisterInfo.h"
19 #include "X86Subtarget.h"
20 #include "X86TargetMachine.h"
21 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
22 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
23 #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
24 #include "llvm/CodeGen/GlobalISel/Utils.h"
25 #include "llvm/CodeGen/MachineBasicBlock.h"
26 #include "llvm/CodeGen/MachineConstantPool.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineInstr.h"
29 #include "llvm/CodeGen/MachineInstrBuilder.h"
30 #include "llvm/CodeGen/MachineMemOperand.h"
31 #include "llvm/CodeGen/MachineOperand.h"
32 #include "llvm/CodeGen/MachineRegisterInfo.h"
33 #include "llvm/CodeGen/TargetOpcodes.h"
34 #include "llvm/CodeGen/TargetRegisterInfo.h"
35 #include "llvm/IR/DataLayout.h"
36 #include "llvm/IR/InstrTypes.h"
37 #include "llvm/IR/IntrinsicsX86.h"
38 #include "llvm/Support/AtomicOrdering.h"
39 #include "llvm/Support/CodeGen.h"
40 #include "llvm/Support/Debug.h"
41 #include "llvm/Support/ErrorHandling.h"
42 #include "llvm/Support/LowLevelTypeImpl.h"
43 #include "llvm/Support/MathExtras.h"
44 #include "llvm/Support/raw_ostream.h"
45 #include <cassert>
46 #include <cstdint>
47 #include <tuple>
48 
49 #define DEBUG_TYPE "X86-isel"
50 
51 using namespace llvm;
52 
53 namespace {
54 
55 #define GET_GLOBALISEL_PREDICATE_BITSET
56 #include "X86GenGlobalISel.inc"
57 #undef GET_GLOBALISEL_PREDICATE_BITSET
58 
59 class X86InstructionSelector : public InstructionSelector {
60 public:
61   X86InstructionSelector(const X86TargetMachine &TM, const X86Subtarget &STI,
62                          const X86RegisterBankInfo &RBI);
63 
64   bool select(MachineInstr &I) override;
getName()65   static const char *getName() { return DEBUG_TYPE; }
66 
67 private:
68   /// tblgen-erated 'select' implementation, used as the initial selector for
69   /// the patterns that don't require complex C++.
70   bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
71 
72   // TODO: remove after supported by Tablegen-erated instruction selection.
73   unsigned getLoadStoreOp(const LLT &Ty, const RegisterBank &RB, unsigned Opc,
74                           uint64_t Alignment) const;
75 
76   bool selectLoadStoreOp(MachineInstr &I, MachineRegisterInfo &MRI,
77                          MachineFunction &MF) const;
78   bool selectFrameIndexOrGep(MachineInstr &I, MachineRegisterInfo &MRI,
79                              MachineFunction &MF) const;
80   bool selectGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI,
81                          MachineFunction &MF) const;
82   bool selectConstant(MachineInstr &I, MachineRegisterInfo &MRI,
83                       MachineFunction &MF) const;
84   bool selectTruncOrPtrToInt(MachineInstr &I, MachineRegisterInfo &MRI,
85                              MachineFunction &MF) const;
86   bool selectZext(MachineInstr &I, MachineRegisterInfo &MRI,
87                   MachineFunction &MF) const;
88   bool selectAnyext(MachineInstr &I, MachineRegisterInfo &MRI,
89                     MachineFunction &MF) const;
90   bool selectCmp(MachineInstr &I, MachineRegisterInfo &MRI,
91                  MachineFunction &MF) const;
92   bool selectFCmp(MachineInstr &I, MachineRegisterInfo &MRI,
93                   MachineFunction &MF) const;
94   bool selectUadde(MachineInstr &I, MachineRegisterInfo &MRI,
95                    MachineFunction &MF) const;
96   bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const;
97   bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI,
98                            MachineFunction &MF);
99   bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI,
100                          MachineFunction &MF);
101   bool selectInsert(MachineInstr &I, MachineRegisterInfo &MRI,
102                     MachineFunction &MF) const;
103   bool selectExtract(MachineInstr &I, MachineRegisterInfo &MRI,
104                      MachineFunction &MF) const;
105   bool selectCondBranch(MachineInstr &I, MachineRegisterInfo &MRI,
106                         MachineFunction &MF) const;
107   bool selectTurnIntoCOPY(MachineInstr &I, MachineRegisterInfo &MRI,
108                           const unsigned DstReg,
109                           const TargetRegisterClass *DstRC,
110                           const unsigned SrcReg,
111                           const TargetRegisterClass *SrcRC) const;
112   bool materializeFP(MachineInstr &I, MachineRegisterInfo &MRI,
113                      MachineFunction &MF) const;
114   bool selectImplicitDefOrPHI(MachineInstr &I, MachineRegisterInfo &MRI) const;
115   bool selectDivRem(MachineInstr &I, MachineRegisterInfo &MRI,
116                     MachineFunction &MF) const;
117   bool selectIntrinsicWSideEffects(MachineInstr &I, MachineRegisterInfo &MRI,
118                                    MachineFunction &MF) const;
119 
120   // emit insert subreg instruction and insert it before MachineInstr &I
121   bool emitInsertSubreg(unsigned DstReg, unsigned SrcReg, MachineInstr &I,
122                         MachineRegisterInfo &MRI, MachineFunction &MF) const;
123   // emit extract subreg instruction and insert it before MachineInstr &I
124   bool emitExtractSubreg(unsigned DstReg, unsigned SrcReg, MachineInstr &I,
125                          MachineRegisterInfo &MRI, MachineFunction &MF) const;
126 
127   const TargetRegisterClass *getRegClass(LLT Ty, const RegisterBank &RB) const;
128   const TargetRegisterClass *getRegClass(LLT Ty, unsigned Reg,
129                                          MachineRegisterInfo &MRI) const;
130 
131   const X86TargetMachine &TM;
132   const X86Subtarget &STI;
133   const X86InstrInfo &TII;
134   const X86RegisterInfo &TRI;
135   const X86RegisterBankInfo &RBI;
136 
137 #define GET_GLOBALISEL_PREDICATES_DECL
138 #include "X86GenGlobalISel.inc"
139 #undef GET_GLOBALISEL_PREDICATES_DECL
140 
141 #define GET_GLOBALISEL_TEMPORARIES_DECL
142 #include "X86GenGlobalISel.inc"
143 #undef GET_GLOBALISEL_TEMPORARIES_DECL
144 };
145 
146 } // end anonymous namespace
147 
148 #define GET_GLOBALISEL_IMPL
149 #include "X86GenGlobalISel.inc"
150 #undef GET_GLOBALISEL_IMPL
151 
X86InstructionSelector(const X86TargetMachine & TM,const X86Subtarget & STI,const X86RegisterBankInfo & RBI)152 X86InstructionSelector::X86InstructionSelector(const X86TargetMachine &TM,
153                                                const X86Subtarget &STI,
154                                                const X86RegisterBankInfo &RBI)
155     : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
156       TRI(*STI.getRegisterInfo()), RBI(RBI),
157 #define GET_GLOBALISEL_PREDICATES_INIT
158 #include "X86GenGlobalISel.inc"
159 #undef GET_GLOBALISEL_PREDICATES_INIT
160 #define GET_GLOBALISEL_TEMPORARIES_INIT
161 #include "X86GenGlobalISel.inc"
162 #undef GET_GLOBALISEL_TEMPORARIES_INIT
163 {
164 }
165 
166 // FIXME: This should be target-independent, inferred from the types declared
167 // for each class in the bank.
168 const TargetRegisterClass *
getRegClass(LLT Ty,const RegisterBank & RB) const169 X86InstructionSelector::getRegClass(LLT Ty, const RegisterBank &RB) const {
170   if (RB.getID() == X86::GPRRegBankID) {
171     if (Ty.getSizeInBits() <= 8)
172       return &X86::GR8RegClass;
173     if (Ty.getSizeInBits() == 16)
174       return &X86::GR16RegClass;
175     if (Ty.getSizeInBits() == 32)
176       return &X86::GR32RegClass;
177     if (Ty.getSizeInBits() == 64)
178       return &X86::GR64RegClass;
179   }
180   if (RB.getID() == X86::VECRRegBankID) {
181     if (Ty.getSizeInBits() == 32)
182       return STI.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
183     if (Ty.getSizeInBits() == 64)
184       return STI.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
185     if (Ty.getSizeInBits() == 128)
186       return STI.hasAVX512() ? &X86::VR128XRegClass : &X86::VR128RegClass;
187     if (Ty.getSizeInBits() == 256)
188       return STI.hasAVX512() ? &X86::VR256XRegClass : &X86::VR256RegClass;
189     if (Ty.getSizeInBits() == 512)
190       return &X86::VR512RegClass;
191   }
192 
193   llvm_unreachable("Unknown RegBank!");
194 }
195 
196 const TargetRegisterClass *
getRegClass(LLT Ty,unsigned Reg,MachineRegisterInfo & MRI) const197 X86InstructionSelector::getRegClass(LLT Ty, unsigned Reg,
198                                     MachineRegisterInfo &MRI) const {
199   const RegisterBank &RegBank = *RBI.getRegBank(Reg, MRI, TRI);
200   return getRegClass(Ty, RegBank);
201 }
202 
getSubRegIndex(const TargetRegisterClass * RC)203 static unsigned getSubRegIndex(const TargetRegisterClass *RC) {
204   unsigned SubIdx = X86::NoSubRegister;
205   if (RC == &X86::GR32RegClass) {
206     SubIdx = X86::sub_32bit;
207   } else if (RC == &X86::GR16RegClass) {
208     SubIdx = X86::sub_16bit;
209   } else if (RC == &X86::GR8RegClass) {
210     SubIdx = X86::sub_8bit;
211   }
212 
213   return SubIdx;
214 }
215 
getRegClassFromGRPhysReg(unsigned Reg)216 static const TargetRegisterClass *getRegClassFromGRPhysReg(unsigned Reg) {
217   assert(Register::isPhysicalRegister(Reg));
218   if (X86::GR64RegClass.contains(Reg))
219     return &X86::GR64RegClass;
220   if (X86::GR32RegClass.contains(Reg))
221     return &X86::GR32RegClass;
222   if (X86::GR16RegClass.contains(Reg))
223     return &X86::GR16RegClass;
224   if (X86::GR8RegClass.contains(Reg))
225     return &X86::GR8RegClass;
226 
227   llvm_unreachable("Unknown RegClass for PhysReg!");
228 }
229 
230 // Set X86 Opcode and constrain DestReg.
selectCopy(MachineInstr & I,MachineRegisterInfo & MRI) const231 bool X86InstructionSelector::selectCopy(MachineInstr &I,
232                                         MachineRegisterInfo &MRI) const {
233   Register DstReg = I.getOperand(0).getReg();
234   const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
235   const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
236 
237   Register SrcReg = I.getOperand(1).getReg();
238   const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
239   const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
240 
241   if (Register::isPhysicalRegister(DstReg)) {
242     assert(I.isCopy() && "Generic operators do not allow physical registers");
243 
244     if (DstSize > SrcSize && SrcRegBank.getID() == X86::GPRRegBankID &&
245         DstRegBank.getID() == X86::GPRRegBankID) {
246 
247       const TargetRegisterClass *SrcRC =
248           getRegClass(MRI.getType(SrcReg), SrcRegBank);
249       const TargetRegisterClass *DstRC = getRegClassFromGRPhysReg(DstReg);
250 
251       if (SrcRC != DstRC) {
252         // This case can be generated by ABI lowering, performe anyext
253         Register ExtSrc = MRI.createVirtualRegister(DstRC);
254         BuildMI(*I.getParent(), I, I.getDebugLoc(),
255                 TII.get(TargetOpcode::SUBREG_TO_REG))
256             .addDef(ExtSrc)
257             .addImm(0)
258             .addReg(SrcReg)
259             .addImm(getSubRegIndex(SrcRC));
260 
261         I.getOperand(1).setReg(ExtSrc);
262       }
263     }
264 
265     return true;
266   }
267 
268   assert((!Register::isPhysicalRegister(SrcReg) || I.isCopy()) &&
269          "No phys reg on generic operators");
270   assert((DstSize == SrcSize ||
271           // Copies are a mean to setup initial types, the number of
272           // bits may not exactly match.
273           (Register::isPhysicalRegister(SrcReg) &&
274            DstSize <= RBI.getSizeInBits(SrcReg, MRI, TRI))) &&
275          "Copy with different width?!");
276 
277   const TargetRegisterClass *DstRC =
278       getRegClass(MRI.getType(DstReg), DstRegBank);
279 
280   if (SrcRegBank.getID() == X86::GPRRegBankID &&
281       DstRegBank.getID() == X86::GPRRegBankID && SrcSize > DstSize &&
282       Register::isPhysicalRegister(SrcReg)) {
283     // Change the physical register to performe truncate.
284 
285     const TargetRegisterClass *SrcRC = getRegClassFromGRPhysReg(SrcReg);
286 
287     if (DstRC != SrcRC) {
288       I.getOperand(1).setSubReg(getSubRegIndex(DstRC));
289       I.getOperand(1).substPhysReg(SrcReg, TRI);
290     }
291   }
292 
293   // No need to constrain SrcReg. It will get constrained when
294   // we hit another of its use or its defs.
295   // Copies do not have constraints.
296   const TargetRegisterClass *OldRC = MRI.getRegClassOrNull(DstReg);
297   if (!OldRC || !DstRC->hasSubClassEq(OldRC)) {
298     if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
299       LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
300                         << " operand\n");
301       return false;
302     }
303   }
304   I.setDesc(TII.get(X86::COPY));
305   return true;
306 }
307 
select(MachineInstr & I)308 bool X86InstructionSelector::select(MachineInstr &I) {
309   assert(I.getParent() && "Instruction should be in a basic block!");
310   assert(I.getParent()->getParent() && "Instruction should be in a function!");
311 
312   MachineBasicBlock &MBB = *I.getParent();
313   MachineFunction &MF = *MBB.getParent();
314   MachineRegisterInfo &MRI = MF.getRegInfo();
315 
316   unsigned Opcode = I.getOpcode();
317   if (!isPreISelGenericOpcode(Opcode)) {
318     // Certain non-generic instructions also need some special handling.
319 
320     if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
321       return false;
322 
323     if (I.isCopy())
324       return selectCopy(I, MRI);
325 
326     return true;
327   }
328 
329   assert(I.getNumOperands() == I.getNumExplicitOperands() &&
330          "Generic instruction has unexpected implicit operands\n");
331 
332   if (selectImpl(I, *CoverageInfo))
333     return true;
334 
335   LLVM_DEBUG(dbgs() << " C++ instruction selection: "; I.print(dbgs()));
336 
337   // TODO: This should be implemented by tblgen.
338   switch (I.getOpcode()) {
339   default:
340     return false;
341   case TargetOpcode::G_STORE:
342   case TargetOpcode::G_LOAD:
343     return selectLoadStoreOp(I, MRI, MF);
344   case TargetOpcode::G_PTR_ADD:
345   case TargetOpcode::G_FRAME_INDEX:
346     return selectFrameIndexOrGep(I, MRI, MF);
347   case TargetOpcode::G_GLOBAL_VALUE:
348     return selectGlobalValue(I, MRI, MF);
349   case TargetOpcode::G_CONSTANT:
350     return selectConstant(I, MRI, MF);
351   case TargetOpcode::G_FCONSTANT:
352     return materializeFP(I, MRI, MF);
353   case TargetOpcode::G_PTRTOINT:
354   case TargetOpcode::G_TRUNC:
355     return selectTruncOrPtrToInt(I, MRI, MF);
356   case TargetOpcode::G_INTTOPTR:
357     return selectCopy(I, MRI);
358   case TargetOpcode::G_ZEXT:
359     return selectZext(I, MRI, MF);
360   case TargetOpcode::G_ANYEXT:
361     return selectAnyext(I, MRI, MF);
362   case TargetOpcode::G_ICMP:
363     return selectCmp(I, MRI, MF);
364   case TargetOpcode::G_FCMP:
365     return selectFCmp(I, MRI, MF);
366   case TargetOpcode::G_UADDE:
367     return selectUadde(I, MRI, MF);
368   case TargetOpcode::G_UNMERGE_VALUES:
369     return selectUnmergeValues(I, MRI, MF);
370   case TargetOpcode::G_MERGE_VALUES:
371   case TargetOpcode::G_CONCAT_VECTORS:
372     return selectMergeValues(I, MRI, MF);
373   case TargetOpcode::G_EXTRACT:
374     return selectExtract(I, MRI, MF);
375   case TargetOpcode::G_INSERT:
376     return selectInsert(I, MRI, MF);
377   case TargetOpcode::G_BRCOND:
378     return selectCondBranch(I, MRI, MF);
379   case TargetOpcode::G_IMPLICIT_DEF:
380   case TargetOpcode::G_PHI:
381     return selectImplicitDefOrPHI(I, MRI);
382   case TargetOpcode::G_SDIV:
383   case TargetOpcode::G_UDIV:
384   case TargetOpcode::G_SREM:
385   case TargetOpcode::G_UREM:
386     return selectDivRem(I, MRI, MF);
387   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
388     return selectIntrinsicWSideEffects(I, MRI, MF);
389   }
390 
391   return false;
392 }
393 
getLoadStoreOp(const LLT & Ty,const RegisterBank & RB,unsigned Opc,uint64_t Alignment) const394 unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty,
395                                                 const RegisterBank &RB,
396                                                 unsigned Opc,
397                                                 uint64_t Alignment) const {
398   bool Isload = (Opc == TargetOpcode::G_LOAD);
399   bool HasAVX = STI.hasAVX();
400   bool HasAVX512 = STI.hasAVX512();
401   bool HasVLX = STI.hasVLX();
402 
403   if (Ty == LLT::scalar(8)) {
404     if (X86::GPRRegBankID == RB.getID())
405       return Isload ? X86::MOV8rm : X86::MOV8mr;
406   } else if (Ty == LLT::scalar(16)) {
407     if (X86::GPRRegBankID == RB.getID())
408       return Isload ? X86::MOV16rm : X86::MOV16mr;
409   } else if (Ty == LLT::scalar(32) || Ty == LLT::pointer(0, 32)) {
410     if (X86::GPRRegBankID == RB.getID())
411       return Isload ? X86::MOV32rm : X86::MOV32mr;
412     if (X86::VECRRegBankID == RB.getID())
413       return Isload ? (HasAVX512 ? X86::VMOVSSZrm_alt :
414                        HasAVX    ? X86::VMOVSSrm_alt :
415                                    X86::MOVSSrm_alt)
416                     : (HasAVX512 ? X86::VMOVSSZmr :
417                        HasAVX    ? X86::VMOVSSmr :
418                                    X86::MOVSSmr);
419   } else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
420     if (X86::GPRRegBankID == RB.getID())
421       return Isload ? X86::MOV64rm : X86::MOV64mr;
422     if (X86::VECRRegBankID == RB.getID())
423       return Isload ? (HasAVX512 ? X86::VMOVSDZrm_alt :
424                        HasAVX    ? X86::VMOVSDrm_alt :
425                                    X86::MOVSDrm_alt)
426                     : (HasAVX512 ? X86::VMOVSDZmr :
427                        HasAVX    ? X86::VMOVSDmr :
428                                    X86::MOVSDmr);
429   } else if (Ty.isVector() && Ty.getSizeInBits() == 128) {
430     if (Alignment >= 16)
431       return Isload ? (HasVLX ? X86::VMOVAPSZ128rm
432                               : HasAVX512
433                                     ? X86::VMOVAPSZ128rm_NOVLX
434                                     : HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm)
435                     : (HasVLX ? X86::VMOVAPSZ128mr
436                               : HasAVX512
437                                     ? X86::VMOVAPSZ128mr_NOVLX
438                                     : HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr);
439     else
440       return Isload ? (HasVLX ? X86::VMOVUPSZ128rm
441                               : HasAVX512
442                                     ? X86::VMOVUPSZ128rm_NOVLX
443                                     : HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm)
444                     : (HasVLX ? X86::VMOVUPSZ128mr
445                               : HasAVX512
446                                     ? X86::VMOVUPSZ128mr_NOVLX
447                                     : HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr);
448   } else if (Ty.isVector() && Ty.getSizeInBits() == 256) {
449     if (Alignment >= 32)
450       return Isload ? (HasVLX ? X86::VMOVAPSZ256rm
451                               : HasAVX512 ? X86::VMOVAPSZ256rm_NOVLX
452                                           : X86::VMOVAPSYrm)
453                     : (HasVLX ? X86::VMOVAPSZ256mr
454                               : HasAVX512 ? X86::VMOVAPSZ256mr_NOVLX
455                                           : X86::VMOVAPSYmr);
456     else
457       return Isload ? (HasVLX ? X86::VMOVUPSZ256rm
458                               : HasAVX512 ? X86::VMOVUPSZ256rm_NOVLX
459                                           : X86::VMOVUPSYrm)
460                     : (HasVLX ? X86::VMOVUPSZ256mr
461                               : HasAVX512 ? X86::VMOVUPSZ256mr_NOVLX
462                                           : X86::VMOVUPSYmr);
463   } else if (Ty.isVector() && Ty.getSizeInBits() == 512) {
464     if (Alignment >= 64)
465       return Isload ? X86::VMOVAPSZrm : X86::VMOVAPSZmr;
466     else
467       return Isload ? X86::VMOVUPSZrm : X86::VMOVUPSZmr;
468   }
469   return Opc;
470 }
471 
472 // Fill in an address from the given instruction.
X86SelectAddress(const MachineInstr & I,const MachineRegisterInfo & MRI,X86AddressMode & AM)473 static void X86SelectAddress(const MachineInstr &I,
474                              const MachineRegisterInfo &MRI,
475                              X86AddressMode &AM) {
476   assert(I.getOperand(0).isReg() && "unsupported opperand.");
477   assert(MRI.getType(I.getOperand(0).getReg()).isPointer() &&
478          "unsupported type.");
479 
480   if (I.getOpcode() == TargetOpcode::G_PTR_ADD) {
481     if (auto COff = getConstantVRegVal(I.getOperand(2).getReg(), MRI)) {
482       int64_t Imm = *COff;
483       if (isInt<32>(Imm)) { // Check for displacement overflow.
484         AM.Disp = static_cast<int32_t>(Imm);
485         AM.Base.Reg = I.getOperand(1).getReg();
486         return;
487       }
488     }
489   } else if (I.getOpcode() == TargetOpcode::G_FRAME_INDEX) {
490     AM.Base.FrameIndex = I.getOperand(1).getIndex();
491     AM.BaseType = X86AddressMode::FrameIndexBase;
492     return;
493   }
494 
495   // Default behavior.
496   AM.Base.Reg = I.getOperand(0).getReg();
497 }
498 
selectLoadStoreOp(MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF) const499 bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I,
500                                                MachineRegisterInfo &MRI,
501                                                MachineFunction &MF) const {
502   unsigned Opc = I.getOpcode();
503 
504   assert((Opc == TargetOpcode::G_STORE || Opc == TargetOpcode::G_LOAD) &&
505          "unexpected instruction");
506 
507   const Register DefReg = I.getOperand(0).getReg();
508   LLT Ty = MRI.getType(DefReg);
509   const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
510 
511   assert(I.hasOneMemOperand());
512   auto &MemOp = **I.memoperands_begin();
513   if (MemOp.isAtomic()) {
514     // Note: for unordered operations, we rely on the fact the appropriate MMO
515     // is already on the instruction we're mutating, and thus we don't need to
516     // make any changes.  So long as we select an opcode which is capable of
517     // loading or storing the appropriate size atomically, the rest of the
518     // backend is required to respect the MMO state.
519     if (!MemOp.isUnordered()) {
520       LLVM_DEBUG(dbgs() << "Atomic ordering not supported yet\n");
521       return false;
522     }
523     if (MemOp.getAlignment() < Ty.getSizeInBits()/8) {
524       LLVM_DEBUG(dbgs() << "Unaligned atomics not supported yet\n");
525       return false;
526     }
527   }
528 
529   unsigned NewOpc = getLoadStoreOp(Ty, RB, Opc, MemOp.getAlignment());
530   if (NewOpc == Opc)
531     return false;
532 
533   X86AddressMode AM;
534   X86SelectAddress(*MRI.getVRegDef(I.getOperand(1).getReg()), MRI, AM);
535 
536   I.setDesc(TII.get(NewOpc));
537   MachineInstrBuilder MIB(MF, I);
538   if (Opc == TargetOpcode::G_LOAD) {
539     I.RemoveOperand(1);
540     addFullAddress(MIB, AM);
541   } else {
542     // G_STORE (VAL, Addr), X86Store instruction (Addr, VAL)
543     I.RemoveOperand(1);
544     I.RemoveOperand(0);
545     addFullAddress(MIB, AM).addUse(DefReg);
546   }
547   return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
548 }
549 
getLeaOP(LLT Ty,const X86Subtarget & STI)550 static unsigned getLeaOP(LLT Ty, const X86Subtarget &STI) {
551   if (Ty == LLT::pointer(0, 64))
552     return X86::LEA64r;
553   else if (Ty == LLT::pointer(0, 32))
554     return STI.isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r;
555   else
556     llvm_unreachable("Can't get LEA opcode. Unsupported type.");
557 }
558 
selectFrameIndexOrGep(MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF) const559 bool X86InstructionSelector::selectFrameIndexOrGep(MachineInstr &I,
560                                                    MachineRegisterInfo &MRI,
561                                                    MachineFunction &MF) const {
562   unsigned Opc = I.getOpcode();
563 
564   assert((Opc == TargetOpcode::G_FRAME_INDEX || Opc == TargetOpcode::G_PTR_ADD) &&
565          "unexpected instruction");
566 
567   const Register DefReg = I.getOperand(0).getReg();
568   LLT Ty = MRI.getType(DefReg);
569 
570   // Use LEA to calculate frame index and GEP
571   unsigned NewOpc = getLeaOP(Ty, STI);
572   I.setDesc(TII.get(NewOpc));
573   MachineInstrBuilder MIB(MF, I);
574 
575   if (Opc == TargetOpcode::G_FRAME_INDEX) {
576     addOffset(MIB, 0);
577   } else {
578     MachineOperand &InxOp = I.getOperand(2);
579     I.addOperand(InxOp);        // set IndexReg
580     InxOp.ChangeToImmediate(1); // set Scale
581     MIB.addImm(0).addReg(0);
582   }
583 
584   return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
585 }
586 
selectGlobalValue(MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF) const587 bool X86InstructionSelector::selectGlobalValue(MachineInstr &I,
588                                                MachineRegisterInfo &MRI,
589                                                MachineFunction &MF) const {
590   assert((I.getOpcode() == TargetOpcode::G_GLOBAL_VALUE) &&
591          "unexpected instruction");
592 
593   auto GV = I.getOperand(1).getGlobal();
594   if (GV->isThreadLocal()) {
595     return false; // TODO: we don't support TLS yet.
596   }
597 
598   // Can't handle alternate code models yet.
599   if (TM.getCodeModel() != CodeModel::Small)
600     return false;
601 
602   X86AddressMode AM;
603   AM.GV = GV;
604   AM.GVOpFlags = STI.classifyGlobalReference(GV);
605 
606   // TODO: The ABI requires an extra load. not supported yet.
607   if (isGlobalStubReference(AM.GVOpFlags))
608     return false;
609 
610   // TODO: This reference is relative to the pic base. not supported yet.
611   if (isGlobalRelativeToPICBase(AM.GVOpFlags))
612     return false;
613 
614   if (STI.isPICStyleRIPRel()) {
615     // Use rip-relative addressing.
616     assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
617     AM.Base.Reg = X86::RIP;
618   }
619 
620   const Register DefReg = I.getOperand(0).getReg();
621   LLT Ty = MRI.getType(DefReg);
622   unsigned NewOpc = getLeaOP(Ty, STI);
623 
624   I.setDesc(TII.get(NewOpc));
625   MachineInstrBuilder MIB(MF, I);
626 
627   I.RemoveOperand(1);
628   addFullAddress(MIB, AM);
629 
630   return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
631 }
632 
selectConstant(MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF) const633 bool X86InstructionSelector::selectConstant(MachineInstr &I,
634                                             MachineRegisterInfo &MRI,
635                                             MachineFunction &MF) const {
636   assert((I.getOpcode() == TargetOpcode::G_CONSTANT) &&
637          "unexpected instruction");
638 
639   const Register DefReg = I.getOperand(0).getReg();
640   LLT Ty = MRI.getType(DefReg);
641 
642   if (RBI.getRegBank(DefReg, MRI, TRI)->getID() != X86::GPRRegBankID)
643     return false;
644 
645   uint64_t Val = 0;
646   if (I.getOperand(1).isCImm()) {
647     Val = I.getOperand(1).getCImm()->getZExtValue();
648     I.getOperand(1).ChangeToImmediate(Val);
649   } else if (I.getOperand(1).isImm()) {
650     Val = I.getOperand(1).getImm();
651   } else
652     llvm_unreachable("Unsupported operand type.");
653 
654   unsigned NewOpc;
655   switch (Ty.getSizeInBits()) {
656   case 8:
657     NewOpc = X86::MOV8ri;
658     break;
659   case 16:
660     NewOpc = X86::MOV16ri;
661     break;
662   case 32:
663     NewOpc = X86::MOV32ri;
664     break;
665   case 64:
666     // TODO: in case isUInt<32>(Val), X86::MOV32ri can be used
667     if (isInt<32>(Val))
668       NewOpc = X86::MOV64ri32;
669     else
670       NewOpc = X86::MOV64ri;
671     break;
672   default:
673     llvm_unreachable("Can't select G_CONSTANT, unsupported type.");
674   }
675 
676   I.setDesc(TII.get(NewOpc));
677   return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
678 }
679 
680 // Helper function for selectTruncOrPtrToInt and selectAnyext.
681 // Returns true if DstRC lives on a floating register class and
682 // SrcRC lives on a 128-bit vector class.
canTurnIntoCOPY(const TargetRegisterClass * DstRC,const TargetRegisterClass * SrcRC)683 static bool canTurnIntoCOPY(const TargetRegisterClass *DstRC,
684                             const TargetRegisterClass *SrcRC) {
685   return (DstRC == &X86::FR32RegClass || DstRC == &X86::FR32XRegClass ||
686           DstRC == &X86::FR64RegClass || DstRC == &X86::FR64XRegClass) &&
687          (SrcRC == &X86::VR128RegClass || SrcRC == &X86::VR128XRegClass);
688 }
689 
selectTurnIntoCOPY(MachineInstr & I,MachineRegisterInfo & MRI,const unsigned DstReg,const TargetRegisterClass * DstRC,const unsigned SrcReg,const TargetRegisterClass * SrcRC) const690 bool X86InstructionSelector::selectTurnIntoCOPY(
691     MachineInstr &I, MachineRegisterInfo &MRI, const unsigned DstReg,
692     const TargetRegisterClass *DstRC, const unsigned SrcReg,
693     const TargetRegisterClass *SrcRC) const {
694 
695   if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
696       !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
697     LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
698                       << " operand\n");
699     return false;
700   }
701   I.setDesc(TII.get(X86::COPY));
702   return true;
703 }
704 
selectTruncOrPtrToInt(MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF) const705 bool X86InstructionSelector::selectTruncOrPtrToInt(MachineInstr &I,
706                                                    MachineRegisterInfo &MRI,
707                                                    MachineFunction &MF) const {
708   assert((I.getOpcode() == TargetOpcode::G_TRUNC ||
709           I.getOpcode() == TargetOpcode::G_PTRTOINT) &&
710          "unexpected instruction");
711 
712   const Register DstReg = I.getOperand(0).getReg();
713   const Register SrcReg = I.getOperand(1).getReg();
714 
715   const LLT DstTy = MRI.getType(DstReg);
716   const LLT SrcTy = MRI.getType(SrcReg);
717 
718   const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
719   const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
720 
721   if (DstRB.getID() != SrcRB.getID()) {
722     LLVM_DEBUG(dbgs() << TII.getName(I.getOpcode())
723                       << " input/output on different banks\n");
724     return false;
725   }
726 
727   const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB);
728   const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcRB);
729 
730   if (!DstRC || !SrcRC)
731     return false;
732 
733   // If that's truncation of the value that lives on the vector class and goes
734   // into the floating class, just replace it with copy, as we are able to
735   // select it as a regular move.
736   if (canTurnIntoCOPY(DstRC, SrcRC))
737     return selectTurnIntoCOPY(I, MRI, DstReg, DstRC, SrcReg, SrcRC);
738 
739   if (DstRB.getID() != X86::GPRRegBankID)
740     return false;
741 
742   unsigned SubIdx;
743   if (DstRC == SrcRC) {
744     // Nothing to be done
745     SubIdx = X86::NoSubRegister;
746   } else if (DstRC == &X86::GR32RegClass) {
747     SubIdx = X86::sub_32bit;
748   } else if (DstRC == &X86::GR16RegClass) {
749     SubIdx = X86::sub_16bit;
750   } else if (DstRC == &X86::GR8RegClass) {
751     SubIdx = X86::sub_8bit;
752   } else {
753     return false;
754   }
755 
756   SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubIdx);
757 
758   if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
759       !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
760     LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
761                       << "\n");
762     return false;
763   }
764 
765   I.getOperand(1).setSubReg(SubIdx);
766 
767   I.setDesc(TII.get(X86::COPY));
768   return true;
769 }
770 
selectZext(MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF) const771 bool X86InstructionSelector::selectZext(MachineInstr &I,
772                                         MachineRegisterInfo &MRI,
773                                         MachineFunction &MF) const {
774   assert((I.getOpcode() == TargetOpcode::G_ZEXT) && "unexpected instruction");
775 
776   const Register DstReg = I.getOperand(0).getReg();
777   const Register SrcReg = I.getOperand(1).getReg();
778 
779   const LLT DstTy = MRI.getType(DstReg);
780   const LLT SrcTy = MRI.getType(SrcReg);
781 
782   assert(!(SrcTy == LLT::scalar(8) && DstTy == LLT::scalar(32)) &&
783          "8=>32 Zext is handled by tablegen");
784   assert(!(SrcTy == LLT::scalar(16) && DstTy == LLT::scalar(32)) &&
785          "16=>32 Zext is handled by tablegen");
786 
787   const static struct ZextEntry {
788     LLT SrcTy;
789     LLT DstTy;
790     unsigned MovOp;
791     bool NeedSubregToReg;
792   } OpTable[] = {
793       {LLT::scalar(8), LLT::scalar(16), X86::MOVZX16rr8, false},  // i8  => i16
794       {LLT::scalar(8), LLT::scalar(64), X86::MOVZX32rr8, true},   // i8  => i64
795       {LLT::scalar(16), LLT::scalar(64), X86::MOVZX32rr16, true}, // i16 => i64
796       {LLT::scalar(32), LLT::scalar(64), 0, true}                 // i32 => i64
797   };
798 
799   auto ZextEntryIt =
800       std::find_if(std::begin(OpTable), std::end(OpTable),
801                    [SrcTy, DstTy](const ZextEntry &El) {
802                      return El.DstTy == DstTy && El.SrcTy == SrcTy;
803                    });
804 
805   // Here we try to select Zext into a MOVZ and/or SUBREG_TO_REG instruction.
806   if (ZextEntryIt != std::end(OpTable)) {
807     const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
808     const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
809     const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB);
810     const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcRB);
811 
812     if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
813         !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
814       LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
815                         << " operand\n");
816       return false;
817     }
818 
819     unsigned TransitRegTo = DstReg;
820     unsigned TransitRegFrom = SrcReg;
821     if (ZextEntryIt->MovOp) {
822       // If we select Zext into MOVZ + SUBREG_TO_REG, we need to have
823       // a transit register in between: create it here.
824       if (ZextEntryIt->NeedSubregToReg) {
825         TransitRegFrom = MRI.createVirtualRegister(
826             getRegClass(LLT::scalar(32), DstReg, MRI));
827         TransitRegTo = TransitRegFrom;
828       }
829 
830       BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(ZextEntryIt->MovOp))
831           .addDef(TransitRegTo)
832           .addReg(SrcReg);
833     }
834     if (ZextEntryIt->NeedSubregToReg) {
835       BuildMI(*I.getParent(), I, I.getDebugLoc(),
836               TII.get(TargetOpcode::SUBREG_TO_REG))
837           .addDef(DstReg)
838           .addImm(0)
839           .addReg(TransitRegFrom)
840           .addImm(X86::sub_32bit);
841     }
842     I.eraseFromParent();
843     return true;
844   }
845 
846   if (SrcTy != LLT::scalar(1))
847     return false;
848 
849   unsigned AndOpc;
850   if (DstTy == LLT::scalar(8))
851     AndOpc = X86::AND8ri;
852   else if (DstTy == LLT::scalar(16))
853     AndOpc = X86::AND16ri8;
854   else if (DstTy == LLT::scalar(32))
855     AndOpc = X86::AND32ri8;
856   else if (DstTy == LLT::scalar(64))
857     AndOpc = X86::AND64ri8;
858   else
859     return false;
860 
861   unsigned DefReg = SrcReg;
862   if (DstTy != LLT::scalar(8)) {
863     DefReg = MRI.createVirtualRegister(getRegClass(DstTy, DstReg, MRI));
864     BuildMI(*I.getParent(), I, I.getDebugLoc(),
865             TII.get(TargetOpcode::SUBREG_TO_REG), DefReg)
866         .addImm(0)
867         .addReg(SrcReg)
868         .addImm(X86::sub_8bit);
869   }
870 
871   MachineInstr &AndInst =
872       *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AndOpc), DstReg)
873            .addReg(DefReg)
874            .addImm(1);
875 
876   constrainSelectedInstRegOperands(AndInst, TII, TRI, RBI);
877 
878   I.eraseFromParent();
879   return true;
880 }
881 
selectAnyext(MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF) const882 bool X86InstructionSelector::selectAnyext(MachineInstr &I,
883                                           MachineRegisterInfo &MRI,
884                                           MachineFunction &MF) const {
885   assert((I.getOpcode() == TargetOpcode::G_ANYEXT) && "unexpected instruction");
886 
887   const Register DstReg = I.getOperand(0).getReg();
888   const Register SrcReg = I.getOperand(1).getReg();
889 
890   const LLT DstTy = MRI.getType(DstReg);
891   const LLT SrcTy = MRI.getType(SrcReg);
892 
893   const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
894   const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
895 
896   assert(DstRB.getID() == SrcRB.getID() &&
897          "G_ANYEXT input/output on different banks\n");
898 
899   assert(DstTy.getSizeInBits() > SrcTy.getSizeInBits() &&
900          "G_ANYEXT incorrect operand size");
901 
902   const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB);
903   const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcRB);
904 
905   // If that's ANY_EXT of the value that lives on the floating class and goes
906   // into the vector class, just replace it with copy, as we are able to select
907   // it as a regular move.
908   if (canTurnIntoCOPY(SrcRC, DstRC))
909     return selectTurnIntoCOPY(I, MRI, SrcReg, SrcRC, DstReg, DstRC);
910 
911   if (DstRB.getID() != X86::GPRRegBankID)
912     return false;
913 
914   if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
915       !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
916     LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
917                       << " operand\n");
918     return false;
919   }
920 
921   if (SrcRC == DstRC) {
922     I.setDesc(TII.get(X86::COPY));
923     return true;
924   }
925 
926   BuildMI(*I.getParent(), I, I.getDebugLoc(),
927           TII.get(TargetOpcode::SUBREG_TO_REG))
928       .addDef(DstReg)
929       .addImm(0)
930       .addReg(SrcReg)
931       .addImm(getSubRegIndex(SrcRC));
932 
933   I.eraseFromParent();
934   return true;
935 }
936 
selectCmp(MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF) const937 bool X86InstructionSelector::selectCmp(MachineInstr &I,
938                                        MachineRegisterInfo &MRI,
939                                        MachineFunction &MF) const {
940   assert((I.getOpcode() == TargetOpcode::G_ICMP) && "unexpected instruction");
941 
942   X86::CondCode CC;
943   bool SwapArgs;
944   std::tie(CC, SwapArgs) = X86::getX86ConditionCode(
945       (CmpInst::Predicate)I.getOperand(1).getPredicate());
946 
947   Register LHS = I.getOperand(2).getReg();
948   Register RHS = I.getOperand(3).getReg();
949 
950   if (SwapArgs)
951     std::swap(LHS, RHS);
952 
953   unsigned OpCmp;
954   LLT Ty = MRI.getType(LHS);
955 
956   switch (Ty.getSizeInBits()) {
957   default:
958     return false;
959   case 8:
960     OpCmp = X86::CMP8rr;
961     break;
962   case 16:
963     OpCmp = X86::CMP16rr;
964     break;
965   case 32:
966     OpCmp = X86::CMP32rr;
967     break;
968   case 64:
969     OpCmp = X86::CMP64rr;
970     break;
971   }
972 
973   MachineInstr &CmpInst =
974       *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpCmp))
975            .addReg(LHS)
976            .addReg(RHS);
977 
978   MachineInstr &SetInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
979                                    TII.get(X86::SETCCr), I.getOperand(0).getReg()).addImm(CC);
980 
981   constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI);
982   constrainSelectedInstRegOperands(SetInst, TII, TRI, RBI);
983 
984   I.eraseFromParent();
985   return true;
986 }
987 
selectFCmp(MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF) const988 bool X86InstructionSelector::selectFCmp(MachineInstr &I,
989                                         MachineRegisterInfo &MRI,
990                                         MachineFunction &MF) const {
991   assert((I.getOpcode() == TargetOpcode::G_FCMP) && "unexpected instruction");
992 
993   Register LhsReg = I.getOperand(2).getReg();
994   Register RhsReg = I.getOperand(3).getReg();
995   CmpInst::Predicate Predicate =
996       (CmpInst::Predicate)I.getOperand(1).getPredicate();
997 
998   // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
999   static const uint16_t SETFOpcTable[2][3] = {
1000       {X86::COND_E, X86::COND_NP, X86::AND8rr},
1001       {X86::COND_NE, X86::COND_P, X86::OR8rr}};
1002   const uint16_t *SETFOpc = nullptr;
1003   switch (Predicate) {
1004   default:
1005     break;
1006   case CmpInst::FCMP_OEQ:
1007     SETFOpc = &SETFOpcTable[0][0];
1008     break;
1009   case CmpInst::FCMP_UNE:
1010     SETFOpc = &SETFOpcTable[1][0];
1011     break;
1012   }
1013 
1014   // Compute the opcode for the CMP instruction.
1015   unsigned OpCmp;
1016   LLT Ty = MRI.getType(LhsReg);
1017   switch (Ty.getSizeInBits()) {
1018   default:
1019     return false;
1020   case 32:
1021     OpCmp = X86::UCOMISSrr;
1022     break;
1023   case 64:
1024     OpCmp = X86::UCOMISDrr;
1025     break;
1026   }
1027 
1028   Register ResultReg = I.getOperand(0).getReg();
1029   RBI.constrainGenericRegister(
1030       ResultReg,
1031       *getRegClass(LLT::scalar(8), *RBI.getRegBank(ResultReg, MRI, TRI)), MRI);
1032   if (SETFOpc) {
1033     MachineInstr &CmpInst =
1034         *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpCmp))
1035              .addReg(LhsReg)
1036              .addReg(RhsReg);
1037 
1038     Register FlagReg1 = MRI.createVirtualRegister(&X86::GR8RegClass);
1039     Register FlagReg2 = MRI.createVirtualRegister(&X86::GR8RegClass);
1040     MachineInstr &Set1 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
1041                                   TII.get(X86::SETCCr), FlagReg1).addImm(SETFOpc[0]);
1042     MachineInstr &Set2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
1043                                   TII.get(X86::SETCCr), FlagReg2).addImm(SETFOpc[1]);
1044     MachineInstr &Set3 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
1045                                   TII.get(SETFOpc[2]), ResultReg)
1046                               .addReg(FlagReg1)
1047                               .addReg(FlagReg2);
1048     constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI);
1049     constrainSelectedInstRegOperands(Set1, TII, TRI, RBI);
1050     constrainSelectedInstRegOperands(Set2, TII, TRI, RBI);
1051     constrainSelectedInstRegOperands(Set3, TII, TRI, RBI);
1052 
1053     I.eraseFromParent();
1054     return true;
1055   }
1056 
1057   X86::CondCode CC;
1058   bool SwapArgs;
1059   std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
1060   assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1061 
1062   if (SwapArgs)
1063     std::swap(LhsReg, RhsReg);
1064 
1065   // Emit a compare of LHS/RHS.
1066   MachineInstr &CmpInst =
1067       *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpCmp))
1068            .addReg(LhsReg)
1069            .addReg(RhsReg);
1070 
1071   MachineInstr &Set =
1072       *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::SETCCr), ResultReg).addImm(CC);
1073   constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI);
1074   constrainSelectedInstRegOperands(Set, TII, TRI, RBI);
1075   I.eraseFromParent();
1076   return true;
1077 }
1078 
selectUadde(MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF) const1079 bool X86InstructionSelector::selectUadde(MachineInstr &I,
1080                                          MachineRegisterInfo &MRI,
1081                                          MachineFunction &MF) const {
1082   assert((I.getOpcode() == TargetOpcode::G_UADDE) && "unexpected instruction");
1083 
1084   const Register DstReg = I.getOperand(0).getReg();
1085   const Register CarryOutReg = I.getOperand(1).getReg();
1086   const Register Op0Reg = I.getOperand(2).getReg();
1087   const Register Op1Reg = I.getOperand(3).getReg();
1088   Register CarryInReg = I.getOperand(4).getReg();
1089 
1090   const LLT DstTy = MRI.getType(DstReg);
1091 
1092   if (DstTy != LLT::scalar(32))
1093     return false;
1094 
1095   // find CarryIn def instruction.
1096   MachineInstr *Def = MRI.getVRegDef(CarryInReg);
1097   while (Def->getOpcode() == TargetOpcode::G_TRUNC) {
1098     CarryInReg = Def->getOperand(1).getReg();
1099     Def = MRI.getVRegDef(CarryInReg);
1100   }
1101 
1102   unsigned Opcode;
1103   if (Def->getOpcode() == TargetOpcode::G_UADDE) {
1104     // carry set by prev ADD.
1105 
1106     BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), X86::EFLAGS)
1107         .addReg(CarryInReg);
1108 
1109     if (!RBI.constrainGenericRegister(CarryInReg, X86::GR32RegClass, MRI))
1110       return false;
1111 
1112     Opcode = X86::ADC32rr;
1113   } else if (auto val = getConstantVRegVal(CarryInReg, MRI)) {
1114     // carry is constant, support only 0.
1115     if (*val != 0)
1116       return false;
1117 
1118     Opcode = X86::ADD32rr;
1119   } else
1120     return false;
1121 
1122   MachineInstr &AddInst =
1123       *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opcode), DstReg)
1124            .addReg(Op0Reg)
1125            .addReg(Op1Reg);
1126 
1127   BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), CarryOutReg)
1128       .addReg(X86::EFLAGS);
1129 
1130   if (!constrainSelectedInstRegOperands(AddInst, TII, TRI, RBI) ||
1131       !RBI.constrainGenericRegister(CarryOutReg, X86::GR32RegClass, MRI))
1132     return false;
1133 
1134   I.eraseFromParent();
1135   return true;
1136 }
1137 
selectExtract(MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF) const1138 bool X86InstructionSelector::selectExtract(MachineInstr &I,
1139                                            MachineRegisterInfo &MRI,
1140                                            MachineFunction &MF) const {
1141   assert((I.getOpcode() == TargetOpcode::G_EXTRACT) &&
1142          "unexpected instruction");
1143 
1144   const Register DstReg = I.getOperand(0).getReg();
1145   const Register SrcReg = I.getOperand(1).getReg();
1146   int64_t Index = I.getOperand(2).getImm();
1147 
1148   const LLT DstTy = MRI.getType(DstReg);
1149   const LLT SrcTy = MRI.getType(SrcReg);
1150 
1151   // Meanwile handle vector type only.
1152   if (!DstTy.isVector())
1153     return false;
1154 
1155   if (Index % DstTy.getSizeInBits() != 0)
1156     return false; // Not extract subvector.
1157 
1158   if (Index == 0) {
1159     // Replace by extract subreg copy.
1160     if (!emitExtractSubreg(DstReg, SrcReg, I, MRI, MF))
1161       return false;
1162 
1163     I.eraseFromParent();
1164     return true;
1165   }
1166 
1167   bool HasAVX = STI.hasAVX();
1168   bool HasAVX512 = STI.hasAVX512();
1169   bool HasVLX = STI.hasVLX();
1170 
1171   if (SrcTy.getSizeInBits() == 256 && DstTy.getSizeInBits() == 128) {
1172     if (HasVLX)
1173       I.setDesc(TII.get(X86::VEXTRACTF32x4Z256rr));
1174     else if (HasAVX)
1175       I.setDesc(TII.get(X86::VEXTRACTF128rr));
1176     else
1177       return false;
1178   } else if (SrcTy.getSizeInBits() == 512 && HasAVX512) {
1179     if (DstTy.getSizeInBits() == 128)
1180       I.setDesc(TII.get(X86::VEXTRACTF32x4Zrr));
1181     else if (DstTy.getSizeInBits() == 256)
1182       I.setDesc(TII.get(X86::VEXTRACTF64x4Zrr));
1183     else
1184       return false;
1185   } else
1186     return false;
1187 
1188   // Convert to X86 VEXTRACT immediate.
1189   Index = Index / DstTy.getSizeInBits();
1190   I.getOperand(2).setImm(Index);
1191 
1192   return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1193 }
1194 
emitExtractSubreg(unsigned DstReg,unsigned SrcReg,MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF) const1195 bool X86InstructionSelector::emitExtractSubreg(unsigned DstReg, unsigned SrcReg,
1196                                                MachineInstr &I,
1197                                                MachineRegisterInfo &MRI,
1198                                                MachineFunction &MF) const {
1199   const LLT DstTy = MRI.getType(DstReg);
1200   const LLT SrcTy = MRI.getType(SrcReg);
1201   unsigned SubIdx = X86::NoSubRegister;
1202 
1203   if (!DstTy.isVector() || !SrcTy.isVector())
1204     return false;
1205 
1206   assert(SrcTy.getSizeInBits() > DstTy.getSizeInBits() &&
1207          "Incorrect Src/Dst register size");
1208 
1209   if (DstTy.getSizeInBits() == 128)
1210     SubIdx = X86::sub_xmm;
1211   else if (DstTy.getSizeInBits() == 256)
1212     SubIdx = X86::sub_ymm;
1213   else
1214     return false;
1215 
1216   const TargetRegisterClass *DstRC = getRegClass(DstTy, DstReg, MRI);
1217   const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcReg, MRI);
1218 
1219   SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubIdx);
1220 
1221   if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
1222       !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1223     LLVM_DEBUG(dbgs() << "Failed to constrain EXTRACT_SUBREG\n");
1224     return false;
1225   }
1226 
1227   BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), DstReg)
1228       .addReg(SrcReg, 0, SubIdx);
1229 
1230   return true;
1231 }
1232 
emitInsertSubreg(unsigned DstReg,unsigned SrcReg,MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF) const1233 bool X86InstructionSelector::emitInsertSubreg(unsigned DstReg, unsigned SrcReg,
1234                                               MachineInstr &I,
1235                                               MachineRegisterInfo &MRI,
1236                                               MachineFunction &MF) const {
1237   const LLT DstTy = MRI.getType(DstReg);
1238   const LLT SrcTy = MRI.getType(SrcReg);
1239   unsigned SubIdx = X86::NoSubRegister;
1240 
1241   // TODO: support scalar types
1242   if (!DstTy.isVector() || !SrcTy.isVector())
1243     return false;
1244 
1245   assert(SrcTy.getSizeInBits() < DstTy.getSizeInBits() &&
1246          "Incorrect Src/Dst register size");
1247 
1248   if (SrcTy.getSizeInBits() == 128)
1249     SubIdx = X86::sub_xmm;
1250   else if (SrcTy.getSizeInBits() == 256)
1251     SubIdx = X86::sub_ymm;
1252   else
1253     return false;
1254 
1255   const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcReg, MRI);
1256   const TargetRegisterClass *DstRC = getRegClass(DstTy, DstReg, MRI);
1257 
1258   if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
1259       !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1260     LLVM_DEBUG(dbgs() << "Failed to constrain INSERT_SUBREG\n");
1261     return false;
1262   }
1263 
1264   BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY))
1265       .addReg(DstReg, RegState::DefineNoRead, SubIdx)
1266       .addReg(SrcReg);
1267 
1268   return true;
1269 }
1270 
selectInsert(MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF) const1271 bool X86InstructionSelector::selectInsert(MachineInstr &I,
1272                                           MachineRegisterInfo &MRI,
1273                                           MachineFunction &MF) const {
1274   assert((I.getOpcode() == TargetOpcode::G_INSERT) && "unexpected instruction");
1275 
1276   const Register DstReg = I.getOperand(0).getReg();
1277   const Register SrcReg = I.getOperand(1).getReg();
1278   const Register InsertReg = I.getOperand(2).getReg();
1279   int64_t Index = I.getOperand(3).getImm();
1280 
1281   const LLT DstTy = MRI.getType(DstReg);
1282   const LLT InsertRegTy = MRI.getType(InsertReg);
1283 
1284   // Meanwile handle vector type only.
1285   if (!DstTy.isVector())
1286     return false;
1287 
1288   if (Index % InsertRegTy.getSizeInBits() != 0)
1289     return false; // Not insert subvector.
1290 
1291   if (Index == 0 && MRI.getVRegDef(SrcReg)->isImplicitDef()) {
1292     // Replace by subreg copy.
1293     if (!emitInsertSubreg(DstReg, InsertReg, I, MRI, MF))
1294       return false;
1295 
1296     I.eraseFromParent();
1297     return true;
1298   }
1299 
1300   bool HasAVX = STI.hasAVX();
1301   bool HasAVX512 = STI.hasAVX512();
1302   bool HasVLX = STI.hasVLX();
1303 
1304   if (DstTy.getSizeInBits() == 256 && InsertRegTy.getSizeInBits() == 128) {
1305     if (HasVLX)
1306       I.setDesc(TII.get(X86::VINSERTF32x4Z256rr));
1307     else if (HasAVX)
1308       I.setDesc(TII.get(X86::VINSERTF128rr));
1309     else
1310       return false;
1311   } else if (DstTy.getSizeInBits() == 512 && HasAVX512) {
1312     if (InsertRegTy.getSizeInBits() == 128)
1313       I.setDesc(TII.get(X86::VINSERTF32x4Zrr));
1314     else if (InsertRegTy.getSizeInBits() == 256)
1315       I.setDesc(TII.get(X86::VINSERTF64x4Zrr));
1316     else
1317       return false;
1318   } else
1319     return false;
1320 
1321   // Convert to X86 VINSERT immediate.
1322   Index = Index / InsertRegTy.getSizeInBits();
1323 
1324   I.getOperand(3).setImm(Index);
1325 
1326   return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1327 }
1328 
selectUnmergeValues(MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF)1329 bool X86InstructionSelector::selectUnmergeValues(
1330     MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) {
1331   assert((I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES) &&
1332          "unexpected instruction");
1333 
1334   // Split to extracts.
1335   unsigned NumDefs = I.getNumOperands() - 1;
1336   Register SrcReg = I.getOperand(NumDefs).getReg();
1337   unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
1338 
1339   for (unsigned Idx = 0; Idx < NumDefs; ++Idx) {
1340     MachineInstr &ExtrInst =
1341         *BuildMI(*I.getParent(), I, I.getDebugLoc(),
1342                  TII.get(TargetOpcode::G_EXTRACT), I.getOperand(Idx).getReg())
1343              .addReg(SrcReg)
1344              .addImm(Idx * DefSize);
1345 
1346     if (!select(ExtrInst))
1347       return false;
1348   }
1349 
1350   I.eraseFromParent();
1351   return true;
1352 }
1353 
selectMergeValues(MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF)1354 bool X86InstructionSelector::selectMergeValues(
1355     MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) {
1356   assert((I.getOpcode() == TargetOpcode::G_MERGE_VALUES ||
1357           I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS) &&
1358          "unexpected instruction");
1359 
1360   // Split to inserts.
1361   Register DstReg = I.getOperand(0).getReg();
1362   Register SrcReg0 = I.getOperand(1).getReg();
1363 
1364   const LLT DstTy = MRI.getType(DstReg);
1365   const LLT SrcTy = MRI.getType(SrcReg0);
1366   unsigned SrcSize = SrcTy.getSizeInBits();
1367 
1368   const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
1369 
1370   // For the first src use insertSubReg.
1371   Register DefReg = MRI.createGenericVirtualRegister(DstTy);
1372   MRI.setRegBank(DefReg, RegBank);
1373   if (!emitInsertSubreg(DefReg, I.getOperand(1).getReg(), I, MRI, MF))
1374     return false;
1375 
1376   for (unsigned Idx = 2; Idx < I.getNumOperands(); ++Idx) {
1377     Register Tmp = MRI.createGenericVirtualRegister(DstTy);
1378     MRI.setRegBank(Tmp, RegBank);
1379 
1380     MachineInstr &InsertInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
1381                                         TII.get(TargetOpcode::G_INSERT), Tmp)
1382                                     .addReg(DefReg)
1383                                     .addReg(I.getOperand(Idx).getReg())
1384                                     .addImm((Idx - 1) * SrcSize);
1385 
1386     DefReg = Tmp;
1387 
1388     if (!select(InsertInst))
1389       return false;
1390   }
1391 
1392   MachineInstr &CopyInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
1393                                     TII.get(TargetOpcode::COPY), DstReg)
1394                                 .addReg(DefReg);
1395 
1396   if (!select(CopyInst))
1397     return false;
1398 
1399   I.eraseFromParent();
1400   return true;
1401 }
1402 
selectCondBranch(MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF) const1403 bool X86InstructionSelector::selectCondBranch(MachineInstr &I,
1404                                               MachineRegisterInfo &MRI,
1405                                               MachineFunction &MF) const {
1406   assert((I.getOpcode() == TargetOpcode::G_BRCOND) && "unexpected instruction");
1407 
1408   const Register CondReg = I.getOperand(0).getReg();
1409   MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1410 
1411   MachineInstr &TestInst =
1412       *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::TEST8ri))
1413            .addReg(CondReg)
1414            .addImm(1);
1415   BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::JCC_1))
1416       .addMBB(DestMBB).addImm(X86::COND_NE);
1417 
1418   constrainSelectedInstRegOperands(TestInst, TII, TRI, RBI);
1419 
1420   I.eraseFromParent();
1421   return true;
1422 }
1423 
materializeFP(MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF) const1424 bool X86InstructionSelector::materializeFP(MachineInstr &I,
1425                                            MachineRegisterInfo &MRI,
1426                                            MachineFunction &MF) const {
1427   assert((I.getOpcode() == TargetOpcode::G_FCONSTANT) &&
1428          "unexpected instruction");
1429 
1430   // Can't handle alternate code models yet.
1431   CodeModel::Model CM = TM.getCodeModel();
1432   if (CM != CodeModel::Small && CM != CodeModel::Large)
1433     return false;
1434 
1435   const Register DstReg = I.getOperand(0).getReg();
1436   const LLT DstTy = MRI.getType(DstReg);
1437   const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
1438   unsigned Align = DstTy.getSizeInBits();
1439   const DebugLoc &DbgLoc = I.getDebugLoc();
1440 
1441   unsigned Opc = getLoadStoreOp(DstTy, RegBank, TargetOpcode::G_LOAD, Align);
1442 
1443   // Create the load from the constant pool.
1444   const ConstantFP *CFP = I.getOperand(1).getFPImm();
1445   unsigned CPI = MF.getConstantPool()->getConstantPoolIndex(CFP, Align);
1446   MachineInstr *LoadInst = nullptr;
1447   unsigned char OpFlag = STI.classifyLocalReference(nullptr);
1448 
1449   if (CM == CodeModel::Large && STI.is64Bit()) {
1450     // Under X86-64 non-small code model, GV (and friends) are 64-bits, so
1451     // they cannot be folded into immediate fields.
1452 
1453     Register AddrReg = MRI.createVirtualRegister(&X86::GR64RegClass);
1454     BuildMI(*I.getParent(), I, DbgLoc, TII.get(X86::MOV64ri), AddrReg)
1455         .addConstantPoolIndex(CPI, 0, OpFlag);
1456 
1457     MachineMemOperand *MMO = MF.getMachineMemOperand(
1458         MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad,
1459         MF.getDataLayout().getPointerSize(), Align);
1460 
1461     LoadInst =
1462         addDirectMem(BuildMI(*I.getParent(), I, DbgLoc, TII.get(Opc), DstReg),
1463                      AddrReg)
1464             .addMemOperand(MMO);
1465 
1466   } else if (CM == CodeModel::Small || !STI.is64Bit()) {
1467     // Handle the case when globals fit in our immediate field.
1468     // This is true for X86-32 always and X86-64 when in -mcmodel=small mode.
1469 
1470     // x86-32 PIC requires a PIC base register for constant pools.
1471     unsigned PICBase = 0;
1472     if (OpFlag == X86II::MO_PIC_BASE_OFFSET || OpFlag == X86II::MO_GOTOFF) {
1473       // PICBase can be allocated by TII.getGlobalBaseReg(&MF).
1474       // In DAGISEL the code that initialize it generated by the CGBR pass.
1475       return false; // TODO support the mode.
1476     } else if (STI.is64Bit() && TM.getCodeModel() == CodeModel::Small)
1477       PICBase = X86::RIP;
1478 
1479     LoadInst = addConstantPoolReference(
1480         BuildMI(*I.getParent(), I, DbgLoc, TII.get(Opc), DstReg), CPI, PICBase,
1481         OpFlag);
1482   } else
1483     return false;
1484 
1485   constrainSelectedInstRegOperands(*LoadInst, TII, TRI, RBI);
1486   I.eraseFromParent();
1487   return true;
1488 }
1489 
selectImplicitDefOrPHI(MachineInstr & I,MachineRegisterInfo & MRI) const1490 bool X86InstructionSelector::selectImplicitDefOrPHI(
1491     MachineInstr &I, MachineRegisterInfo &MRI) const {
1492   assert((I.getOpcode() == TargetOpcode::G_IMPLICIT_DEF ||
1493           I.getOpcode() == TargetOpcode::G_PHI) &&
1494          "unexpected instruction");
1495 
1496   Register DstReg = I.getOperand(0).getReg();
1497 
1498   if (!MRI.getRegClassOrNull(DstReg)) {
1499     const LLT DstTy = MRI.getType(DstReg);
1500     const TargetRegisterClass *RC = getRegClass(DstTy, DstReg, MRI);
1501 
1502     if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) {
1503       LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1504                         << " operand\n");
1505       return false;
1506     }
1507   }
1508 
1509   if (I.getOpcode() == TargetOpcode::G_IMPLICIT_DEF)
1510     I.setDesc(TII.get(X86::IMPLICIT_DEF));
1511   else
1512     I.setDesc(TII.get(X86::PHI));
1513 
1514   return true;
1515 }
1516 
selectDivRem(MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF) const1517 bool X86InstructionSelector::selectDivRem(MachineInstr &I,
1518                                           MachineRegisterInfo &MRI,
1519                                           MachineFunction &MF) const {
1520   // The implementation of this function is taken from X86FastISel.
1521   assert((I.getOpcode() == TargetOpcode::G_SDIV ||
1522           I.getOpcode() == TargetOpcode::G_SREM ||
1523           I.getOpcode() == TargetOpcode::G_UDIV ||
1524           I.getOpcode() == TargetOpcode::G_UREM) &&
1525          "unexpected instruction");
1526 
1527   const Register DstReg = I.getOperand(0).getReg();
1528   const Register Op1Reg = I.getOperand(1).getReg();
1529   const Register Op2Reg = I.getOperand(2).getReg();
1530 
1531   const LLT RegTy = MRI.getType(DstReg);
1532   assert(RegTy == MRI.getType(Op1Reg) && RegTy == MRI.getType(Op2Reg) &&
1533          "Arguments and return value types must match");
1534 
1535   const RegisterBank *RegRB = RBI.getRegBank(DstReg, MRI, TRI);
1536   if (!RegRB || RegRB->getID() != X86::GPRRegBankID)
1537     return false;
1538 
1539   const static unsigned NumTypes = 4; // i8, i16, i32, i64
1540   const static unsigned NumOps = 4;   // SDiv, SRem, UDiv, URem
1541   const static bool S = true;         // IsSigned
1542   const static bool U = false;        // !IsSigned
1543   const static unsigned Copy = TargetOpcode::COPY;
1544   // For the X86 IDIV instruction, in most cases the dividend
1545   // (numerator) must be in a specific register pair highreg:lowreg,
1546   // producing the quotient in lowreg and the remainder in highreg.
1547   // For most data types, to set up the instruction, the dividend is
1548   // copied into lowreg, and lowreg is sign-extended into highreg.  The
1549   // exception is i8, where the dividend is defined as a single register rather
1550   // than a register pair, and we therefore directly sign-extend the dividend
1551   // into lowreg, instead of copying, and ignore the highreg.
1552   const static struct DivRemEntry {
1553     // The following portion depends only on the data type.
1554     unsigned SizeInBits;
1555     unsigned LowInReg;  // low part of the register pair
1556     unsigned HighInReg; // high part of the register pair
1557     // The following portion depends on both the data type and the operation.
1558     struct DivRemResult {
1559       unsigned OpDivRem;        // The specific DIV/IDIV opcode to use.
1560       unsigned OpSignExtend;    // Opcode for sign-extending lowreg into
1561                                 // highreg, or copying a zero into highreg.
1562       unsigned OpCopy;          // Opcode for copying dividend into lowreg, or
1563                                 // zero/sign-extending into lowreg for i8.
1564       unsigned DivRemResultReg; // Register containing the desired result.
1565       bool IsOpSigned;          // Whether to use signed or unsigned form.
1566     } ResultTable[NumOps];
1567   } OpTable[NumTypes] = {
1568       {8,
1569        X86::AX,
1570        0,
1571        {
1572            {X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S}, // SDiv
1573            {X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S}, // SRem
1574            {X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U},  // UDiv
1575            {X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U},  // URem
1576        }},                                                // i8
1577       {16,
1578        X86::AX,
1579        X86::DX,
1580        {
1581            {X86::IDIV16r, X86::CWD, Copy, X86::AX, S},    // SDiv
1582            {X86::IDIV16r, X86::CWD, Copy, X86::DX, S},    // SRem
1583            {X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U}, // UDiv
1584            {X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U}, // URem
1585        }},                                                // i16
1586       {32,
1587        X86::EAX,
1588        X86::EDX,
1589        {
1590            {X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S},    // SDiv
1591            {X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S},    // SRem
1592            {X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U}, // UDiv
1593            {X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U}, // URem
1594        }},                                                 // i32
1595       {64,
1596        X86::RAX,
1597        X86::RDX,
1598        {
1599            {X86::IDIV64r, X86::CQO, Copy, X86::RAX, S},    // SDiv
1600            {X86::IDIV64r, X86::CQO, Copy, X86::RDX, S},    // SRem
1601            {X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U}, // UDiv
1602            {X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U}, // URem
1603        }},                                                 // i64
1604   };
1605 
1606   auto OpEntryIt = std::find_if(std::begin(OpTable), std::end(OpTable),
1607                                 [RegTy](const DivRemEntry &El) {
1608                                   return El.SizeInBits == RegTy.getSizeInBits();
1609                                 });
1610   if (OpEntryIt == std::end(OpTable))
1611     return false;
1612 
1613   unsigned OpIndex;
1614   switch (I.getOpcode()) {
1615   default:
1616     llvm_unreachable("Unexpected div/rem opcode");
1617   case TargetOpcode::G_SDIV:
1618     OpIndex = 0;
1619     break;
1620   case TargetOpcode::G_SREM:
1621     OpIndex = 1;
1622     break;
1623   case TargetOpcode::G_UDIV:
1624     OpIndex = 2;
1625     break;
1626   case TargetOpcode::G_UREM:
1627     OpIndex = 3;
1628     break;
1629   }
1630 
1631   const DivRemEntry &TypeEntry = *OpEntryIt;
1632   const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
1633 
1634   const TargetRegisterClass *RegRC = getRegClass(RegTy, *RegRB);
1635   if (!RBI.constrainGenericRegister(Op1Reg, *RegRC, MRI) ||
1636       !RBI.constrainGenericRegister(Op2Reg, *RegRC, MRI) ||
1637       !RBI.constrainGenericRegister(DstReg, *RegRC, MRI)) {
1638     LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1639                       << " operand\n");
1640     return false;
1641   }
1642 
1643   // Move op1 into low-order input register.
1644   BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpEntry.OpCopy),
1645           TypeEntry.LowInReg)
1646       .addReg(Op1Reg);
1647   // Zero-extend or sign-extend into high-order input register.
1648   if (OpEntry.OpSignExtend) {
1649     if (OpEntry.IsOpSigned)
1650       BuildMI(*I.getParent(), I, I.getDebugLoc(),
1651               TII.get(OpEntry.OpSignExtend));
1652     else {
1653       Register Zero32 = MRI.createVirtualRegister(&X86::GR32RegClass);
1654       BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::MOV32r0),
1655               Zero32);
1656 
1657       // Copy the zero into the appropriate sub/super/identical physical
1658       // register. Unfortunately the operations needed are not uniform enough
1659       // to fit neatly into the table above.
1660       if (RegTy.getSizeInBits() == 16) {
1661         BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy),
1662                 TypeEntry.HighInReg)
1663             .addReg(Zero32, 0, X86::sub_16bit);
1664       } else if (RegTy.getSizeInBits() == 32) {
1665         BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy),
1666                 TypeEntry.HighInReg)
1667             .addReg(Zero32);
1668       } else if (RegTy.getSizeInBits() == 64) {
1669         BuildMI(*I.getParent(), I, I.getDebugLoc(),
1670                 TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
1671             .addImm(0)
1672             .addReg(Zero32)
1673             .addImm(X86::sub_32bit);
1674       }
1675     }
1676   }
1677   // Generate the DIV/IDIV instruction.
1678   BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpEntry.OpDivRem))
1679       .addReg(Op2Reg);
1680   // For i8 remainder, we can't reference ah directly, as we'll end
1681   // up with bogus copies like %r9b = COPY %ah. Reference ax
1682   // instead to prevent ah references in a rex instruction.
1683   //
1684   // The current assumption of the fast register allocator is that isel
1685   // won't generate explicit references to the GR8_NOREX registers. If
1686   // the allocator and/or the backend get enhanced to be more robust in
1687   // that regard, this can be, and should be, removed.
1688   if ((I.getOpcode() == Instruction::SRem ||
1689        I.getOpcode() == Instruction::URem) &&
1690       OpEntry.DivRemResultReg == X86::AH && STI.is64Bit()) {
1691     Register SourceSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass);
1692     Register ResultSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass);
1693     BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy), SourceSuperReg)
1694         .addReg(X86::AX);
1695 
1696     // Shift AX right by 8 bits instead of using AH.
1697     BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::SHR16ri),
1698             ResultSuperReg)
1699         .addReg(SourceSuperReg)
1700         .addImm(8);
1701 
1702     // Now reference the 8-bit subreg of the result.
1703     BuildMI(*I.getParent(), I, I.getDebugLoc(),
1704             TII.get(TargetOpcode::SUBREG_TO_REG))
1705         .addDef(DstReg)
1706         .addImm(0)
1707         .addReg(ResultSuperReg)
1708         .addImm(X86::sub_8bit);
1709   } else {
1710     BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY),
1711             DstReg)
1712         .addReg(OpEntry.DivRemResultReg);
1713   }
1714   I.eraseFromParent();
1715   return true;
1716 }
1717 
selectIntrinsicWSideEffects(MachineInstr & I,MachineRegisterInfo & MRI,MachineFunction & MF) const1718 bool X86InstructionSelector::selectIntrinsicWSideEffects(
1719     MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const {
1720 
1721   assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS &&
1722          "unexpected instruction");
1723 
1724   if (I.getOperand(0).getIntrinsicID() != Intrinsic::trap)
1725     return false;
1726 
1727   BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::TRAP));
1728 
1729   I.eraseFromParent();
1730   return true;
1731 }
1732 
1733 InstructionSelector *
createX86InstructionSelector(const X86TargetMachine & TM,X86Subtarget & Subtarget,X86RegisterBankInfo & RBI)1734 llvm::createX86InstructionSelector(const X86TargetMachine &TM,
1735                                    X86Subtarget &Subtarget,
1736                                    X86RegisterBankInfo &RBI) {
1737   return new X86InstructionSelector(TM, Subtarget, RBI);
1738 }
1739