1 //===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains code to lower X86 MachineInstrs to their corresponding
10 // MCInst records.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "MCTargetDesc/X86ATTInstPrinter.h"
15 #include "MCTargetDesc/X86BaseInfo.h"
16 #include "MCTargetDesc/X86InstComments.h"
17 #include "MCTargetDesc/X86ShuffleDecode.h"
18 #include "MCTargetDesc/X86TargetStreamer.h"
19 #include "X86AsmPrinter.h"
20 #include "X86RegisterInfo.h"
21 #include "X86ShuffleDecodeConstantPool.h"
22 #include "X86Subtarget.h"
23 #include "llvm/ADT/Optional.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/iterator_range.h"
26 #include "llvm/CodeGen/MachineConstantPool.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
29 #include "llvm/CodeGen/MachineOperand.h"
30 #include "llvm/CodeGen/StackMaps.h"
31 #include "llvm/IR/DataLayout.h"
32 #include "llvm/IR/GlobalValue.h"
33 #include "llvm/IR/Mangler.h"
34 #include "llvm/MC/MCAsmInfo.h"
35 #include "llvm/MC/MCCodeEmitter.h"
36 #include "llvm/MC/MCContext.h"
37 #include "llvm/MC/MCExpr.h"
38 #include "llvm/MC/MCFixup.h"
39 #include "llvm/MC/MCInst.h"
40 #include "llvm/MC/MCInstBuilder.h"
41 #include "llvm/MC/MCSection.h"
42 #include "llvm/MC/MCSectionELF.h"
43 #include "llvm/MC/MCStreamer.h"
44 #include "llvm/MC/MCSymbol.h"
45 #include "llvm/MC/MCSymbolELF.h"
46 #include "llvm/Target/TargetLoweringObjectFile.h"
47 #include "llvm/Target/TargetMachine.h"
48 
49 using namespace llvm;
50 
51 namespace {
52 
53 /// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
54 class X86MCInstLower {
55   MCContext &Ctx;
56   const MachineFunction &MF;
57   const TargetMachine &TM;
58   const MCAsmInfo &MAI;
59   X86AsmPrinter &AsmPrinter;
60 
61 public:
62   X86MCInstLower(const MachineFunction &MF, X86AsmPrinter &asmprinter);
63 
64   Optional<MCOperand> LowerMachineOperand(const MachineInstr *MI,
65                                           const MachineOperand &MO) const;
66   void Lower(const MachineInstr *MI, MCInst &OutMI) const;
67 
68   MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const;
69   MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
70 
71 private:
72   MachineModuleInfoMachO &getMachOMMI() const;
73 };
74 
75 } // end anonymous namespace
76 
77 /// A RAII helper which defines a region of instructions which can't have
78 /// padding added between them for correctness.
79 struct NoAutoPaddingScope {
80   MCStreamer &OS;
81   const bool OldAllowAutoPadding;
NoAutoPaddingScopeNoAutoPaddingScope82   NoAutoPaddingScope(MCStreamer &OS)
83       : OS(OS), OldAllowAutoPadding(OS.getAllowAutoPadding()) {
84     changeAndComment(false);
85   }
~NoAutoPaddingScopeNoAutoPaddingScope86   ~NoAutoPaddingScope() { changeAndComment(OldAllowAutoPadding); }
changeAndCommentNoAutoPaddingScope87   void changeAndComment(bool b) {
88     if (b == OS.getAllowAutoPadding())
89       return;
90     OS.setAllowAutoPadding(b);
91     if (b)
92       OS.emitRawComment("autopadding");
93     else
94       OS.emitRawComment("noautopadding");
95   }
96 };
97 
98 // Emit a minimal sequence of nops spanning NumBytes bytes.
99 static void emitX86Nops(MCStreamer &OS, unsigned NumBytes,
100                         const X86Subtarget *Subtarget);
101 
count(MCInst & Inst,const MCSubtargetInfo & STI,MCCodeEmitter * CodeEmitter)102 void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst,
103                                                  const MCSubtargetInfo &STI,
104                                                  MCCodeEmitter *CodeEmitter) {
105   if (InShadow) {
106     SmallString<256> Code;
107     SmallVector<MCFixup, 4> Fixups;
108     raw_svector_ostream VecOS(Code);
109     CodeEmitter->encodeInstruction(Inst, VecOS, Fixups, STI);
110     CurrentShadowSize += Code.size();
111     if (CurrentShadowSize >= RequiredShadowSize)
112       InShadow = false; // The shadow is big enough. Stop counting.
113   }
114 }
115 
emitShadowPadding(MCStreamer & OutStreamer,const MCSubtargetInfo & STI)116 void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding(
117     MCStreamer &OutStreamer, const MCSubtargetInfo &STI) {
118   if (InShadow && CurrentShadowSize < RequiredShadowSize) {
119     InShadow = false;
120     emitX86Nops(OutStreamer, RequiredShadowSize - CurrentShadowSize,
121                 &MF->getSubtarget<X86Subtarget>());
122   }
123 }
124 
EmitAndCountInstruction(MCInst & Inst)125 void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) {
126   OutStreamer->emitInstruction(Inst, getSubtargetInfo());
127   SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get());
128 }
129 
X86MCInstLower(const MachineFunction & mf,X86AsmPrinter & asmprinter)130 X86MCInstLower::X86MCInstLower(const MachineFunction &mf,
131                                X86AsmPrinter &asmprinter)
132     : Ctx(mf.getContext()), MF(mf), TM(mf.getTarget()), MAI(*TM.getMCAsmInfo()),
133       AsmPrinter(asmprinter) {}
134 
getMachOMMI() const135 MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
136   return MF.getMMI().getObjFileInfo<MachineModuleInfoMachO>();
137 }
138 
139 /// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol
140 /// operand to an MCSymbol.
GetSymbolFromOperand(const MachineOperand & MO) const141 MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const {
142   const Triple &TT = TM.getTargetTriple();
143   if (MO.isGlobal() && TT.isOSBinFormatELF())
144     return AsmPrinter.getSymbolPreferLocal(*MO.getGlobal());
145 
146   const DataLayout &DL = MF.getDataLayout();
147   assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) &&
148          "Isn't a symbol reference");
149 
150   MCSymbol *Sym = nullptr;
151   SmallString<128> Name;
152   StringRef Suffix;
153 
154   switch (MO.getTargetFlags()) {
155   case X86II::MO_DLLIMPORT:
156     // Handle dllimport linkage.
157     Name += "__imp_";
158     break;
159   case X86II::MO_COFFSTUB:
160     Name += ".refptr.";
161     break;
162   case X86II::MO_DARWIN_NONLAZY:
163   case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
164     Suffix = "$non_lazy_ptr";
165     break;
166   }
167 
168   if (!Suffix.empty())
169     Name += DL.getPrivateGlobalPrefix();
170 
171   if (MO.isGlobal()) {
172     const GlobalValue *GV = MO.getGlobal();
173     AsmPrinter.getNameWithPrefix(Name, GV);
174   } else if (MO.isSymbol()) {
175     Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL);
176   } else if (MO.isMBB()) {
177     assert(Suffix.empty());
178     Sym = MO.getMBB()->getSymbol();
179   }
180 
181   Name += Suffix;
182   if (!Sym)
183     Sym = Ctx.getOrCreateSymbol(Name);
184 
185   // If the target flags on the operand changes the name of the symbol, do that
186   // before we return the symbol.
187   switch (MO.getTargetFlags()) {
188   default:
189     break;
190   case X86II::MO_COFFSTUB: {
191     MachineModuleInfoCOFF &MMICOFF =
192         MF.getMMI().getObjFileInfo<MachineModuleInfoCOFF>();
193     MachineModuleInfoImpl::StubValueTy &StubSym = MMICOFF.getGVStubEntry(Sym);
194     if (!StubSym.getPointer()) {
195       assert(MO.isGlobal() && "Extern symbol not handled yet");
196       StubSym = MachineModuleInfoImpl::StubValueTy(
197           AsmPrinter.getSymbol(MO.getGlobal()), true);
198     }
199     break;
200   }
201   case X86II::MO_DARWIN_NONLAZY:
202   case X86II::MO_DARWIN_NONLAZY_PIC_BASE: {
203     MachineModuleInfoImpl::StubValueTy &StubSym =
204         getMachOMMI().getGVStubEntry(Sym);
205     if (!StubSym.getPointer()) {
206       assert(MO.isGlobal() && "Extern symbol not handled yet");
207       StubSym = MachineModuleInfoImpl::StubValueTy(
208           AsmPrinter.getSymbol(MO.getGlobal()),
209           !MO.getGlobal()->hasInternalLinkage());
210     }
211     break;
212   }
213   }
214 
215   return Sym;
216 }
217 
LowerSymbolOperand(const MachineOperand & MO,MCSymbol * Sym) const218 MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
219                                              MCSymbol *Sym) const {
220   // FIXME: We would like an efficient form for this, so we don't have to do a
221   // lot of extra uniquing.
222   const MCExpr *Expr = nullptr;
223   MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
224 
225   switch (MO.getTargetFlags()) {
226   default:
227     llvm_unreachable("Unknown target flag on GV operand");
228   case X86II::MO_NO_FLAG: // No flag.
229   // These affect the name of the symbol, not any suffix.
230   case X86II::MO_DARWIN_NONLAZY:
231   case X86II::MO_DLLIMPORT:
232   case X86II::MO_COFFSTUB:
233     break;
234 
235   case X86II::MO_TLVP:
236     RefKind = MCSymbolRefExpr::VK_TLVP;
237     break;
238   case X86II::MO_TLVP_PIC_BASE:
239     Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
240     // Subtract the pic base.
241     Expr = MCBinaryExpr::createSub(
242         Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx);
243     break;
244   case X86II::MO_SECREL:
245     RefKind = MCSymbolRefExpr::VK_SECREL;
246     break;
247   case X86II::MO_TLSGD:
248     RefKind = MCSymbolRefExpr::VK_TLSGD;
249     break;
250   case X86II::MO_TLSLD:
251     RefKind = MCSymbolRefExpr::VK_TLSLD;
252     break;
253   case X86II::MO_TLSLDM:
254     RefKind = MCSymbolRefExpr::VK_TLSLDM;
255     break;
256   case X86II::MO_GOTTPOFF:
257     RefKind = MCSymbolRefExpr::VK_GOTTPOFF;
258     break;
259   case X86II::MO_INDNTPOFF:
260     RefKind = MCSymbolRefExpr::VK_INDNTPOFF;
261     break;
262   case X86II::MO_TPOFF:
263     RefKind = MCSymbolRefExpr::VK_TPOFF;
264     break;
265   case X86II::MO_DTPOFF:
266     RefKind = MCSymbolRefExpr::VK_DTPOFF;
267     break;
268   case X86II::MO_NTPOFF:
269     RefKind = MCSymbolRefExpr::VK_NTPOFF;
270     break;
271   case X86II::MO_GOTNTPOFF:
272     RefKind = MCSymbolRefExpr::VK_GOTNTPOFF;
273     break;
274   case X86II::MO_GOTPCREL:
275     RefKind = MCSymbolRefExpr::VK_GOTPCREL;
276     break;
277   case X86II::MO_GOT:
278     RefKind = MCSymbolRefExpr::VK_GOT;
279     break;
280   case X86II::MO_GOTOFF:
281     RefKind = MCSymbolRefExpr::VK_GOTOFF;
282     break;
283   case X86II::MO_PLT:
284     RefKind = MCSymbolRefExpr::VK_PLT;
285     break;
286   case X86II::MO_ABS8:
287     RefKind = MCSymbolRefExpr::VK_X86_ABS8;
288     break;
289   case X86II::MO_PIC_BASE_OFFSET:
290   case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
291     Expr = MCSymbolRefExpr::create(Sym, Ctx);
292     // Subtract the pic base.
293     Expr = MCBinaryExpr::createSub(
294         Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx);
295     if (MO.isJTI()) {
296       assert(MAI.doesSetDirectiveSuppressReloc());
297       // If .set directive is supported, use it to reduce the number of
298       // relocations the assembler will generate for differences between
299       // local labels. This is only safe when the symbols are in the same
300       // section so we are restricting it to jumptable references.
301       MCSymbol *Label = Ctx.createTempSymbol();
302       AsmPrinter.OutStreamer->emitAssignment(Label, Expr);
303       Expr = MCSymbolRefExpr::create(Label, Ctx);
304     }
305     break;
306   }
307 
308   if (!Expr)
309     Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx);
310 
311   if (!MO.isJTI() && !MO.isMBB() && MO.getOffset())
312     Expr = MCBinaryExpr::createAdd(
313         Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
314   return MCOperand::createExpr(Expr);
315 }
316 
317 /// Simplify FOO $imm, %{al,ax,eax,rax} to FOO $imm, for instruction with
318 /// a short fixed-register form.
SimplifyShortImmForm(MCInst & Inst,unsigned Opcode)319 static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) {
320   unsigned ImmOp = Inst.getNumOperands() - 1;
321   assert(Inst.getOperand(0).isReg() &&
322          (Inst.getOperand(ImmOp).isImm() || Inst.getOperand(ImmOp).isExpr()) &&
323          ((Inst.getNumOperands() == 3 && Inst.getOperand(1).isReg() &&
324            Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) ||
325           Inst.getNumOperands() == 2) &&
326          "Unexpected instruction!");
327 
328   // Check whether the destination register can be fixed.
329   unsigned Reg = Inst.getOperand(0).getReg();
330   if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
331     return;
332 
333   // If so, rewrite the instruction.
334   MCOperand Saved = Inst.getOperand(ImmOp);
335   Inst = MCInst();
336   Inst.setOpcode(Opcode);
337   Inst.addOperand(Saved);
338 }
339 
340 /// If a movsx instruction has a shorter encoding for the used register
341 /// simplify the instruction to use it instead.
SimplifyMOVSX(MCInst & Inst)342 static void SimplifyMOVSX(MCInst &Inst) {
343   unsigned NewOpcode = 0;
344   unsigned Op0 = Inst.getOperand(0).getReg(), Op1 = Inst.getOperand(1).getReg();
345   switch (Inst.getOpcode()) {
346   default:
347     llvm_unreachable("Unexpected instruction!");
348   case X86::MOVSX16rr8: // movsbw %al, %ax   --> cbtw
349     if (Op0 == X86::AX && Op1 == X86::AL)
350       NewOpcode = X86::CBW;
351     break;
352   case X86::MOVSX32rr16: // movswl %ax, %eax  --> cwtl
353     if (Op0 == X86::EAX && Op1 == X86::AX)
354       NewOpcode = X86::CWDE;
355     break;
356   case X86::MOVSX64rr32: // movslq %eax, %rax --> cltq
357     if (Op0 == X86::RAX && Op1 == X86::EAX)
358       NewOpcode = X86::CDQE;
359     break;
360   }
361 
362   if (NewOpcode != 0) {
363     Inst = MCInst();
364     Inst.setOpcode(NewOpcode);
365   }
366 }
367 
368 /// Simplify things like MOV32rm to MOV32o32a.
SimplifyShortMoveForm(X86AsmPrinter & Printer,MCInst & Inst,unsigned Opcode)369 static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
370                                   unsigned Opcode) {
371   // Don't make these simplifications in 64-bit mode; other assemblers don't
372   // perform them because they make the code larger.
373   if (Printer.getSubtarget().is64Bit())
374     return;
375 
376   bool IsStore = Inst.getOperand(0).isReg() && Inst.getOperand(1).isReg();
377   unsigned AddrBase = IsStore;
378   unsigned RegOp = IsStore ? 0 : 5;
379   unsigned AddrOp = AddrBase + 3;
380   assert(
381       Inst.getNumOperands() == 6 && Inst.getOperand(RegOp).isReg() &&
382       Inst.getOperand(AddrBase + X86::AddrBaseReg).isReg() &&
383       Inst.getOperand(AddrBase + X86::AddrScaleAmt).isImm() &&
384       Inst.getOperand(AddrBase + X86::AddrIndexReg).isReg() &&
385       Inst.getOperand(AddrBase + X86::AddrSegmentReg).isReg() &&
386       (Inst.getOperand(AddrOp).isExpr() || Inst.getOperand(AddrOp).isImm()) &&
387       "Unexpected instruction!");
388 
389   // Check whether the destination register can be fixed.
390   unsigned Reg = Inst.getOperand(RegOp).getReg();
391   if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
392     return;
393 
394   // Check whether this is an absolute address.
395   // FIXME: We know TLVP symbol refs aren't, but there should be a better way
396   // to do this here.
397   bool Absolute = true;
398   if (Inst.getOperand(AddrOp).isExpr()) {
399     const MCExpr *MCE = Inst.getOperand(AddrOp).getExpr();
400     if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(MCE))
401       if (SRE->getKind() == MCSymbolRefExpr::VK_TLVP)
402         Absolute = false;
403   }
404 
405   if (Absolute &&
406       (Inst.getOperand(AddrBase + X86::AddrBaseReg).getReg() != 0 ||
407        Inst.getOperand(AddrBase + X86::AddrScaleAmt).getImm() != 1 ||
408        Inst.getOperand(AddrBase + X86::AddrIndexReg).getReg() != 0))
409     return;
410 
411   // If so, rewrite the instruction.
412   MCOperand Saved = Inst.getOperand(AddrOp);
413   MCOperand Seg = Inst.getOperand(AddrBase + X86::AddrSegmentReg);
414   Inst = MCInst();
415   Inst.setOpcode(Opcode);
416   Inst.addOperand(Saved);
417   Inst.addOperand(Seg);
418 }
419 
getRetOpcode(const X86Subtarget & Subtarget)420 static unsigned getRetOpcode(const X86Subtarget &Subtarget) {
421   return Subtarget.is64Bit() ? X86::RETQ : X86::RETL;
422 }
423 
424 Optional<MCOperand>
LowerMachineOperand(const MachineInstr * MI,const MachineOperand & MO) const425 X86MCInstLower::LowerMachineOperand(const MachineInstr *MI,
426                                     const MachineOperand &MO) const {
427   switch (MO.getType()) {
428   default:
429     MI->print(errs());
430     llvm_unreachable("unknown operand type");
431   case MachineOperand::MO_Register:
432     // Ignore all implicit register operands.
433     if (MO.isImplicit())
434       return None;
435     return MCOperand::createReg(MO.getReg());
436   case MachineOperand::MO_Immediate:
437     return MCOperand::createImm(MO.getImm());
438   case MachineOperand::MO_MachineBasicBlock:
439   case MachineOperand::MO_GlobalAddress:
440   case MachineOperand::MO_ExternalSymbol:
441     return LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
442   case MachineOperand::MO_MCSymbol:
443     return LowerSymbolOperand(MO, MO.getMCSymbol());
444   case MachineOperand::MO_JumpTableIndex:
445     return LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()));
446   case MachineOperand::MO_ConstantPoolIndex:
447     return LowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex()));
448   case MachineOperand::MO_BlockAddress:
449     return LowerSymbolOperand(
450         MO, AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()));
451   case MachineOperand::MO_RegisterMask:
452     // Ignore call clobbers.
453     return None;
454   }
455 }
456 
457 // Replace TAILJMP opcodes with their equivalent opcodes that have encoding
458 // information.
convertTailJumpOpcode(unsigned Opcode)459 static unsigned convertTailJumpOpcode(unsigned Opcode) {
460   switch (Opcode) {
461   case X86::TAILJMPr:
462     Opcode = X86::JMP32r;
463     break;
464   case X86::TAILJMPm:
465     Opcode = X86::JMP32m;
466     break;
467   case X86::TAILJMPr64:
468     Opcode = X86::JMP64r;
469     break;
470   case X86::TAILJMPm64:
471     Opcode = X86::JMP64m;
472     break;
473   case X86::TAILJMPr64_REX:
474     Opcode = X86::JMP64r_REX;
475     break;
476   case X86::TAILJMPm64_REX:
477     Opcode = X86::JMP64m_REX;
478     break;
479   case X86::TAILJMPd:
480   case X86::TAILJMPd64:
481     Opcode = X86::JMP_1;
482     break;
483   case X86::TAILJMPd_CC:
484   case X86::TAILJMPd64_CC:
485     Opcode = X86::JCC_1;
486     break;
487   }
488 
489   return Opcode;
490 }
491 
Lower(const MachineInstr * MI,MCInst & OutMI) const492 void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
493   OutMI.setOpcode(MI->getOpcode());
494 
495   for (const MachineOperand &MO : MI->operands())
496     if (auto MaybeMCOp = LowerMachineOperand(MI, MO))
497       OutMI.addOperand(MaybeMCOp.getValue());
498 
499   // Handle a few special cases to eliminate operand modifiers.
500   switch (OutMI.getOpcode()) {
501   case X86::LEA64_32r:
502   case X86::LEA64r:
503   case X86::LEA16r:
504   case X86::LEA32r:
505     // LEA should have a segment register, but it must be empty.
506     assert(OutMI.getNumOperands() == 1 + X86::AddrNumOperands &&
507            "Unexpected # of LEA operands");
508     assert(OutMI.getOperand(1 + X86::AddrSegmentReg).getReg() == 0 &&
509            "LEA has segment specified!");
510     break;
511 
512   case X86::MULX32Hrr:
513   case X86::MULX32Hrm:
514   case X86::MULX64Hrr:
515   case X86::MULX64Hrm: {
516     // Turn into regular MULX by duplicating the destination.
517     unsigned NewOpc;
518     switch (OutMI.getOpcode()) {
519     default: llvm_unreachable("Invalid opcode");
520     case X86::MULX32Hrr: NewOpc = X86::MULX32rr; break;
521     case X86::MULX32Hrm: NewOpc = X86::MULX32rm; break;
522     case X86::MULX64Hrr: NewOpc = X86::MULX64rr; break;
523     case X86::MULX64Hrm: NewOpc = X86::MULX64rm; break;
524     }
525     OutMI.setOpcode(NewOpc);
526     // Duplicate the destination.
527     unsigned DestReg = OutMI.getOperand(0).getReg();
528     OutMI.insert(OutMI.begin(), MCOperand::createReg(DestReg));
529     break;
530   }
531 
532   // Commute operands to get a smaller encoding by using VEX.R instead of VEX.B
533   // if one of the registers is extended, but other isn't.
534   case X86::VMOVZPQILo2PQIrr:
535   case X86::VMOVAPDrr:
536   case X86::VMOVAPDYrr:
537   case X86::VMOVAPSrr:
538   case X86::VMOVAPSYrr:
539   case X86::VMOVDQArr:
540   case X86::VMOVDQAYrr:
541   case X86::VMOVDQUrr:
542   case X86::VMOVDQUYrr:
543   case X86::VMOVUPDrr:
544   case X86::VMOVUPDYrr:
545   case X86::VMOVUPSrr:
546   case X86::VMOVUPSYrr: {
547     if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
548         X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg())) {
549       unsigned NewOpc;
550       switch (OutMI.getOpcode()) {
551       default: llvm_unreachable("Invalid opcode");
552       case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr;   break;
553       case X86::VMOVAPDrr:        NewOpc = X86::VMOVAPDrr_REV;  break;
554       case X86::VMOVAPDYrr:       NewOpc = X86::VMOVAPDYrr_REV; break;
555       case X86::VMOVAPSrr:        NewOpc = X86::VMOVAPSrr_REV;  break;
556       case X86::VMOVAPSYrr:       NewOpc = X86::VMOVAPSYrr_REV; break;
557       case X86::VMOVDQArr:        NewOpc = X86::VMOVDQArr_REV;  break;
558       case X86::VMOVDQAYrr:       NewOpc = X86::VMOVDQAYrr_REV; break;
559       case X86::VMOVDQUrr:        NewOpc = X86::VMOVDQUrr_REV;  break;
560       case X86::VMOVDQUYrr:       NewOpc = X86::VMOVDQUYrr_REV; break;
561       case X86::VMOVUPDrr:        NewOpc = X86::VMOVUPDrr_REV;  break;
562       case X86::VMOVUPDYrr:       NewOpc = X86::VMOVUPDYrr_REV; break;
563       case X86::VMOVUPSrr:        NewOpc = X86::VMOVUPSrr_REV;  break;
564       case X86::VMOVUPSYrr:       NewOpc = X86::VMOVUPSYrr_REV; break;
565       }
566       OutMI.setOpcode(NewOpc);
567     }
568     break;
569   }
570   case X86::VMOVSDrr:
571   case X86::VMOVSSrr: {
572     if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
573         X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
574       unsigned NewOpc;
575       switch (OutMI.getOpcode()) {
576       default: llvm_unreachable("Invalid opcode");
577       case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
578       case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
579       }
580       OutMI.setOpcode(NewOpc);
581     }
582     break;
583   }
584 
585   case X86::VPCMPBZ128rmi:  case X86::VPCMPBZ128rmik:
586   case X86::VPCMPBZ128rri:  case X86::VPCMPBZ128rrik:
587   case X86::VPCMPBZ256rmi:  case X86::VPCMPBZ256rmik:
588   case X86::VPCMPBZ256rri:  case X86::VPCMPBZ256rrik:
589   case X86::VPCMPBZrmi:     case X86::VPCMPBZrmik:
590   case X86::VPCMPBZrri:     case X86::VPCMPBZrrik:
591   case X86::VPCMPDZ128rmi:  case X86::VPCMPDZ128rmik:
592   case X86::VPCMPDZ128rmib: case X86::VPCMPDZ128rmibk:
593   case X86::VPCMPDZ128rri:  case X86::VPCMPDZ128rrik:
594   case X86::VPCMPDZ256rmi:  case X86::VPCMPDZ256rmik:
595   case X86::VPCMPDZ256rmib: case X86::VPCMPDZ256rmibk:
596   case X86::VPCMPDZ256rri:  case X86::VPCMPDZ256rrik:
597   case X86::VPCMPDZrmi:     case X86::VPCMPDZrmik:
598   case X86::VPCMPDZrmib:    case X86::VPCMPDZrmibk:
599   case X86::VPCMPDZrri:     case X86::VPCMPDZrrik:
600   case X86::VPCMPQZ128rmi:  case X86::VPCMPQZ128rmik:
601   case X86::VPCMPQZ128rmib: case X86::VPCMPQZ128rmibk:
602   case X86::VPCMPQZ128rri:  case X86::VPCMPQZ128rrik:
603   case X86::VPCMPQZ256rmi:  case X86::VPCMPQZ256rmik:
604   case X86::VPCMPQZ256rmib: case X86::VPCMPQZ256rmibk:
605   case X86::VPCMPQZ256rri:  case X86::VPCMPQZ256rrik:
606   case X86::VPCMPQZrmi:     case X86::VPCMPQZrmik:
607   case X86::VPCMPQZrmib:    case X86::VPCMPQZrmibk:
608   case X86::VPCMPQZrri:     case X86::VPCMPQZrrik:
609   case X86::VPCMPWZ128rmi:  case X86::VPCMPWZ128rmik:
610   case X86::VPCMPWZ128rri:  case X86::VPCMPWZ128rrik:
611   case X86::VPCMPWZ256rmi:  case X86::VPCMPWZ256rmik:
612   case X86::VPCMPWZ256rri:  case X86::VPCMPWZ256rrik:
613   case X86::VPCMPWZrmi:     case X86::VPCMPWZrmik:
614   case X86::VPCMPWZrri:     case X86::VPCMPWZrrik: {
615     // Turn immediate 0 into the VPCMPEQ instruction.
616     if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 0) {
617       unsigned NewOpc;
618       switch (OutMI.getOpcode()) {
619       default: llvm_unreachable("Invalid opcode");
620       case X86::VPCMPBZ128rmi:   NewOpc = X86::VPCMPEQBZ128rm;   break;
621       case X86::VPCMPBZ128rmik:  NewOpc = X86::VPCMPEQBZ128rmk;  break;
622       case X86::VPCMPBZ128rri:   NewOpc = X86::VPCMPEQBZ128rr;   break;
623       case X86::VPCMPBZ128rrik:  NewOpc = X86::VPCMPEQBZ128rrk;  break;
624       case X86::VPCMPBZ256rmi:   NewOpc = X86::VPCMPEQBZ256rm;   break;
625       case X86::VPCMPBZ256rmik:  NewOpc = X86::VPCMPEQBZ256rmk;  break;
626       case X86::VPCMPBZ256rri:   NewOpc = X86::VPCMPEQBZ256rr;   break;
627       case X86::VPCMPBZ256rrik:  NewOpc = X86::VPCMPEQBZ256rrk;  break;
628       case X86::VPCMPBZrmi:      NewOpc = X86::VPCMPEQBZrm;      break;
629       case X86::VPCMPBZrmik:     NewOpc = X86::VPCMPEQBZrmk;     break;
630       case X86::VPCMPBZrri:      NewOpc = X86::VPCMPEQBZrr;      break;
631       case X86::VPCMPBZrrik:     NewOpc = X86::VPCMPEQBZrrk;     break;
632       case X86::VPCMPDZ128rmi:   NewOpc = X86::VPCMPEQDZ128rm;   break;
633       case X86::VPCMPDZ128rmib:  NewOpc = X86::VPCMPEQDZ128rmb;  break;
634       case X86::VPCMPDZ128rmibk: NewOpc = X86::VPCMPEQDZ128rmbk; break;
635       case X86::VPCMPDZ128rmik:  NewOpc = X86::VPCMPEQDZ128rmk;  break;
636       case X86::VPCMPDZ128rri:   NewOpc = X86::VPCMPEQDZ128rr;   break;
637       case X86::VPCMPDZ128rrik:  NewOpc = X86::VPCMPEQDZ128rrk;  break;
638       case X86::VPCMPDZ256rmi:   NewOpc = X86::VPCMPEQDZ256rm;   break;
639       case X86::VPCMPDZ256rmib:  NewOpc = X86::VPCMPEQDZ256rmb;  break;
640       case X86::VPCMPDZ256rmibk: NewOpc = X86::VPCMPEQDZ256rmbk; break;
641       case X86::VPCMPDZ256rmik:  NewOpc = X86::VPCMPEQDZ256rmk;  break;
642       case X86::VPCMPDZ256rri:   NewOpc = X86::VPCMPEQDZ256rr;   break;
643       case X86::VPCMPDZ256rrik:  NewOpc = X86::VPCMPEQDZ256rrk;  break;
644       case X86::VPCMPDZrmi:      NewOpc = X86::VPCMPEQDZrm;      break;
645       case X86::VPCMPDZrmib:     NewOpc = X86::VPCMPEQDZrmb;     break;
646       case X86::VPCMPDZrmibk:    NewOpc = X86::VPCMPEQDZrmbk;    break;
647       case X86::VPCMPDZrmik:     NewOpc = X86::VPCMPEQDZrmk;     break;
648       case X86::VPCMPDZrri:      NewOpc = X86::VPCMPEQDZrr;      break;
649       case X86::VPCMPDZrrik:     NewOpc = X86::VPCMPEQDZrrk;     break;
650       case X86::VPCMPQZ128rmi:   NewOpc = X86::VPCMPEQQZ128rm;   break;
651       case X86::VPCMPQZ128rmib:  NewOpc = X86::VPCMPEQQZ128rmb;  break;
652       case X86::VPCMPQZ128rmibk: NewOpc = X86::VPCMPEQQZ128rmbk; break;
653       case X86::VPCMPQZ128rmik:  NewOpc = X86::VPCMPEQQZ128rmk;  break;
654       case X86::VPCMPQZ128rri:   NewOpc = X86::VPCMPEQQZ128rr;   break;
655       case X86::VPCMPQZ128rrik:  NewOpc = X86::VPCMPEQQZ128rrk;  break;
656       case X86::VPCMPQZ256rmi:   NewOpc = X86::VPCMPEQQZ256rm;   break;
657       case X86::VPCMPQZ256rmib:  NewOpc = X86::VPCMPEQQZ256rmb;  break;
658       case X86::VPCMPQZ256rmibk: NewOpc = X86::VPCMPEQQZ256rmbk; break;
659       case X86::VPCMPQZ256rmik:  NewOpc = X86::VPCMPEQQZ256rmk;  break;
660       case X86::VPCMPQZ256rri:   NewOpc = X86::VPCMPEQQZ256rr;   break;
661       case X86::VPCMPQZ256rrik:  NewOpc = X86::VPCMPEQQZ256rrk;  break;
662       case X86::VPCMPQZrmi:      NewOpc = X86::VPCMPEQQZrm;      break;
663       case X86::VPCMPQZrmib:     NewOpc = X86::VPCMPEQQZrmb;     break;
664       case X86::VPCMPQZrmibk:    NewOpc = X86::VPCMPEQQZrmbk;    break;
665       case X86::VPCMPQZrmik:     NewOpc = X86::VPCMPEQQZrmk;     break;
666       case X86::VPCMPQZrri:      NewOpc = X86::VPCMPEQQZrr;      break;
667       case X86::VPCMPQZrrik:     NewOpc = X86::VPCMPEQQZrrk;     break;
668       case X86::VPCMPWZ128rmi:   NewOpc = X86::VPCMPEQWZ128rm;   break;
669       case X86::VPCMPWZ128rmik:  NewOpc = X86::VPCMPEQWZ128rmk;  break;
670       case X86::VPCMPWZ128rri:   NewOpc = X86::VPCMPEQWZ128rr;   break;
671       case X86::VPCMPWZ128rrik:  NewOpc = X86::VPCMPEQWZ128rrk;  break;
672       case X86::VPCMPWZ256rmi:   NewOpc = X86::VPCMPEQWZ256rm;   break;
673       case X86::VPCMPWZ256rmik:  NewOpc = X86::VPCMPEQWZ256rmk;  break;
674       case X86::VPCMPWZ256rri:   NewOpc = X86::VPCMPEQWZ256rr;   break;
675       case X86::VPCMPWZ256rrik:  NewOpc = X86::VPCMPEQWZ256rrk;  break;
676       case X86::VPCMPWZrmi:      NewOpc = X86::VPCMPEQWZrm;      break;
677       case X86::VPCMPWZrmik:     NewOpc = X86::VPCMPEQWZrmk;     break;
678       case X86::VPCMPWZrri:      NewOpc = X86::VPCMPEQWZrr;      break;
679       case X86::VPCMPWZrrik:     NewOpc = X86::VPCMPEQWZrrk;     break;
680       }
681 
682       OutMI.setOpcode(NewOpc);
683       OutMI.erase(&OutMI.getOperand(OutMI.getNumOperands() - 1));
684       break;
685     }
686 
687     // Turn immediate 6 into the VPCMPGT instruction.
688     if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 6) {
689       unsigned NewOpc;
690       switch (OutMI.getOpcode()) {
691       default: llvm_unreachable("Invalid opcode");
692       case X86::VPCMPBZ128rmi:   NewOpc = X86::VPCMPGTBZ128rm;   break;
693       case X86::VPCMPBZ128rmik:  NewOpc = X86::VPCMPGTBZ128rmk;  break;
694       case X86::VPCMPBZ128rri:   NewOpc = X86::VPCMPGTBZ128rr;   break;
695       case X86::VPCMPBZ128rrik:  NewOpc = X86::VPCMPGTBZ128rrk;  break;
696       case X86::VPCMPBZ256rmi:   NewOpc = X86::VPCMPGTBZ256rm;   break;
697       case X86::VPCMPBZ256rmik:  NewOpc = X86::VPCMPGTBZ256rmk;  break;
698       case X86::VPCMPBZ256rri:   NewOpc = X86::VPCMPGTBZ256rr;   break;
699       case X86::VPCMPBZ256rrik:  NewOpc = X86::VPCMPGTBZ256rrk;  break;
700       case X86::VPCMPBZrmi:      NewOpc = X86::VPCMPGTBZrm;      break;
701       case X86::VPCMPBZrmik:     NewOpc = X86::VPCMPGTBZrmk;     break;
702       case X86::VPCMPBZrri:      NewOpc = X86::VPCMPGTBZrr;      break;
703       case X86::VPCMPBZrrik:     NewOpc = X86::VPCMPGTBZrrk;     break;
704       case X86::VPCMPDZ128rmi:   NewOpc = X86::VPCMPGTDZ128rm;   break;
705       case X86::VPCMPDZ128rmib:  NewOpc = X86::VPCMPGTDZ128rmb;  break;
706       case X86::VPCMPDZ128rmibk: NewOpc = X86::VPCMPGTDZ128rmbk; break;
707       case X86::VPCMPDZ128rmik:  NewOpc = X86::VPCMPGTDZ128rmk;  break;
708       case X86::VPCMPDZ128rri:   NewOpc = X86::VPCMPGTDZ128rr;   break;
709       case X86::VPCMPDZ128rrik:  NewOpc = X86::VPCMPGTDZ128rrk;  break;
710       case X86::VPCMPDZ256rmi:   NewOpc = X86::VPCMPGTDZ256rm;   break;
711       case X86::VPCMPDZ256rmib:  NewOpc = X86::VPCMPGTDZ256rmb;  break;
712       case X86::VPCMPDZ256rmibk: NewOpc = X86::VPCMPGTDZ256rmbk; break;
713       case X86::VPCMPDZ256rmik:  NewOpc = X86::VPCMPGTDZ256rmk;  break;
714       case X86::VPCMPDZ256rri:   NewOpc = X86::VPCMPGTDZ256rr;   break;
715       case X86::VPCMPDZ256rrik:  NewOpc = X86::VPCMPGTDZ256rrk;  break;
716       case X86::VPCMPDZrmi:      NewOpc = X86::VPCMPGTDZrm;      break;
717       case X86::VPCMPDZrmib:     NewOpc = X86::VPCMPGTDZrmb;     break;
718       case X86::VPCMPDZrmibk:    NewOpc = X86::VPCMPGTDZrmbk;    break;
719       case X86::VPCMPDZrmik:     NewOpc = X86::VPCMPGTDZrmk;     break;
720       case X86::VPCMPDZrri:      NewOpc = X86::VPCMPGTDZrr;      break;
721       case X86::VPCMPDZrrik:     NewOpc = X86::VPCMPGTDZrrk;     break;
722       case X86::VPCMPQZ128rmi:   NewOpc = X86::VPCMPGTQZ128rm;   break;
723       case X86::VPCMPQZ128rmib:  NewOpc = X86::VPCMPGTQZ128rmb;  break;
724       case X86::VPCMPQZ128rmibk: NewOpc = X86::VPCMPGTQZ128rmbk; break;
725       case X86::VPCMPQZ128rmik:  NewOpc = X86::VPCMPGTQZ128rmk;  break;
726       case X86::VPCMPQZ128rri:   NewOpc = X86::VPCMPGTQZ128rr;   break;
727       case X86::VPCMPQZ128rrik:  NewOpc = X86::VPCMPGTQZ128rrk;  break;
728       case X86::VPCMPQZ256rmi:   NewOpc = X86::VPCMPGTQZ256rm;   break;
729       case X86::VPCMPQZ256rmib:  NewOpc = X86::VPCMPGTQZ256rmb;  break;
730       case X86::VPCMPQZ256rmibk: NewOpc = X86::VPCMPGTQZ256rmbk; break;
731       case X86::VPCMPQZ256rmik:  NewOpc = X86::VPCMPGTQZ256rmk;  break;
732       case X86::VPCMPQZ256rri:   NewOpc = X86::VPCMPGTQZ256rr;   break;
733       case X86::VPCMPQZ256rrik:  NewOpc = X86::VPCMPGTQZ256rrk;  break;
734       case X86::VPCMPQZrmi:      NewOpc = X86::VPCMPGTQZrm;      break;
735       case X86::VPCMPQZrmib:     NewOpc = X86::VPCMPGTQZrmb;     break;
736       case X86::VPCMPQZrmibk:    NewOpc = X86::VPCMPGTQZrmbk;    break;
737       case X86::VPCMPQZrmik:     NewOpc = X86::VPCMPGTQZrmk;     break;
738       case X86::VPCMPQZrri:      NewOpc = X86::VPCMPGTQZrr;      break;
739       case X86::VPCMPQZrrik:     NewOpc = X86::VPCMPGTQZrrk;     break;
740       case X86::VPCMPWZ128rmi:   NewOpc = X86::VPCMPGTWZ128rm;   break;
741       case X86::VPCMPWZ128rmik:  NewOpc = X86::VPCMPGTWZ128rmk;  break;
742       case X86::VPCMPWZ128rri:   NewOpc = X86::VPCMPGTWZ128rr;   break;
743       case X86::VPCMPWZ128rrik:  NewOpc = X86::VPCMPGTWZ128rrk;  break;
744       case X86::VPCMPWZ256rmi:   NewOpc = X86::VPCMPGTWZ256rm;   break;
745       case X86::VPCMPWZ256rmik:  NewOpc = X86::VPCMPGTWZ256rmk;  break;
746       case X86::VPCMPWZ256rri:   NewOpc = X86::VPCMPGTWZ256rr;   break;
747       case X86::VPCMPWZ256rrik:  NewOpc = X86::VPCMPGTWZ256rrk;  break;
748       case X86::VPCMPWZrmi:      NewOpc = X86::VPCMPGTWZrm;      break;
749       case X86::VPCMPWZrmik:     NewOpc = X86::VPCMPGTWZrmk;     break;
750       case X86::VPCMPWZrri:      NewOpc = X86::VPCMPGTWZrr;      break;
751       case X86::VPCMPWZrrik:     NewOpc = X86::VPCMPGTWZrrk;     break;
752       }
753 
754       OutMI.setOpcode(NewOpc);
755       OutMI.erase(&OutMI.getOperand(OutMI.getNumOperands() - 1));
756       break;
757     }
758 
759     break;
760   }
761 
762   // CALL64r, CALL64pcrel32 - These instructions used to have
763   // register inputs modeled as normal uses instead of implicit uses.  As such,
764   // they we used to truncate off all but the first operand (the callee). This
765   // issue seems to have been fixed at some point. This assert verifies that.
766   case X86::CALL64r:
767   case X86::CALL64pcrel32:
768     assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!");
769     break;
770 
771   case X86::EH_RETURN:
772   case X86::EH_RETURN64: {
773     OutMI = MCInst();
774     OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
775     break;
776   }
777 
778   case X86::CLEANUPRET: {
779     // Replace CLEANUPRET with the appropriate RET.
780     OutMI = MCInst();
781     OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
782     break;
783   }
784 
785   case X86::CATCHRET: {
786     // Replace CATCHRET with the appropriate RET.
787     const X86Subtarget &Subtarget = AsmPrinter.getSubtarget();
788     unsigned ReturnReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
789     OutMI = MCInst();
790     OutMI.setOpcode(getRetOpcode(Subtarget));
791     OutMI.addOperand(MCOperand::createReg(ReturnReg));
792     break;
793   }
794 
795   // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump
796   // instruction.
797   case X86::TAILJMPr:
798   case X86::TAILJMPr64:
799   case X86::TAILJMPr64_REX:
800   case X86::TAILJMPd:
801   case X86::TAILJMPd64:
802     assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!");
803     OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
804     break;
805 
806   case X86::TAILJMPd_CC:
807   case X86::TAILJMPd64_CC:
808     assert(OutMI.getNumOperands() == 2 && "Unexpected number of operands!");
809     OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
810     break;
811 
812   case X86::TAILJMPm:
813   case X86::TAILJMPm64:
814   case X86::TAILJMPm64_REX:
815     assert(OutMI.getNumOperands() == X86::AddrNumOperands &&
816            "Unexpected number of operands!");
817     OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
818     break;
819 
820   case X86::DEC16r:
821   case X86::DEC32r:
822   case X86::INC16r:
823   case X86::INC32r:
824     // If we aren't in 64-bit mode we can use the 1-byte inc/dec instructions.
825     if (!AsmPrinter.getSubtarget().is64Bit()) {
826       unsigned Opcode;
827       switch (OutMI.getOpcode()) {
828       default: llvm_unreachable("Invalid opcode");
829       case X86::DEC16r: Opcode = X86::DEC16r_alt; break;
830       case X86::DEC32r: Opcode = X86::DEC32r_alt; break;
831       case X86::INC16r: Opcode = X86::INC16r_alt; break;
832       case X86::INC32r: Opcode = X86::INC32r_alt; break;
833       }
834       OutMI.setOpcode(Opcode);
835     }
836     break;
837 
838   // We don't currently select the correct instruction form for instructions
839   // which have a short %eax, etc. form. Handle this by custom lowering, for
840   // now.
841   //
842   // Note, we are currently not handling the following instructions:
843   // MOV64ao8, MOV64o8a
844   // XCHG16ar, XCHG32ar, XCHG64ar
845   case X86::MOV8mr_NOREX:
846   case X86::MOV8mr:
847   case X86::MOV8rm_NOREX:
848   case X86::MOV8rm:
849   case X86::MOV16mr:
850   case X86::MOV16rm:
851   case X86::MOV32mr:
852   case X86::MOV32rm: {
853     unsigned NewOpc;
854     switch (OutMI.getOpcode()) {
855     default: llvm_unreachable("Invalid opcode");
856     case X86::MOV8mr_NOREX:
857     case X86::MOV8mr:  NewOpc = X86::MOV8o32a; break;
858     case X86::MOV8rm_NOREX:
859     case X86::MOV8rm:  NewOpc = X86::MOV8ao32; break;
860     case X86::MOV16mr: NewOpc = X86::MOV16o32a; break;
861     case X86::MOV16rm: NewOpc = X86::MOV16ao32; break;
862     case X86::MOV32mr: NewOpc = X86::MOV32o32a; break;
863     case X86::MOV32rm: NewOpc = X86::MOV32ao32; break;
864     }
865     SimplifyShortMoveForm(AsmPrinter, OutMI, NewOpc);
866     break;
867   }
868 
869   case X86::ADC8ri: case X86::ADC16ri: case X86::ADC32ri: case X86::ADC64ri32:
870   case X86::ADD8ri: case X86::ADD16ri: case X86::ADD32ri: case X86::ADD64ri32:
871   case X86::AND8ri: case X86::AND16ri: case X86::AND32ri: case X86::AND64ri32:
872   case X86::CMP8ri: case X86::CMP16ri: case X86::CMP32ri: case X86::CMP64ri32:
873   case X86::OR8ri:  case X86::OR16ri:  case X86::OR32ri:  case X86::OR64ri32:
874   case X86::SBB8ri: case X86::SBB16ri: case X86::SBB32ri: case X86::SBB64ri32:
875   case X86::SUB8ri: case X86::SUB16ri: case X86::SUB32ri: case X86::SUB64ri32:
876   case X86::TEST8ri:case X86::TEST16ri:case X86::TEST32ri:case X86::TEST64ri32:
877   case X86::XOR8ri: case X86::XOR16ri: case X86::XOR32ri: case X86::XOR64ri32: {
878     unsigned NewOpc;
879     switch (OutMI.getOpcode()) {
880     default: llvm_unreachable("Invalid opcode");
881     case X86::ADC8ri:     NewOpc = X86::ADC8i8;    break;
882     case X86::ADC16ri:    NewOpc = X86::ADC16i16;  break;
883     case X86::ADC32ri:    NewOpc = X86::ADC32i32;  break;
884     case X86::ADC64ri32:  NewOpc = X86::ADC64i32;  break;
885     case X86::ADD8ri:     NewOpc = X86::ADD8i8;    break;
886     case X86::ADD16ri:    NewOpc = X86::ADD16i16;  break;
887     case X86::ADD32ri:    NewOpc = X86::ADD32i32;  break;
888     case X86::ADD64ri32:  NewOpc = X86::ADD64i32;  break;
889     case X86::AND8ri:     NewOpc = X86::AND8i8;    break;
890     case X86::AND16ri:    NewOpc = X86::AND16i16;  break;
891     case X86::AND32ri:    NewOpc = X86::AND32i32;  break;
892     case X86::AND64ri32:  NewOpc = X86::AND64i32;  break;
893     case X86::CMP8ri:     NewOpc = X86::CMP8i8;    break;
894     case X86::CMP16ri:    NewOpc = X86::CMP16i16;  break;
895     case X86::CMP32ri:    NewOpc = X86::CMP32i32;  break;
896     case X86::CMP64ri32:  NewOpc = X86::CMP64i32;  break;
897     case X86::OR8ri:      NewOpc = X86::OR8i8;     break;
898     case X86::OR16ri:     NewOpc = X86::OR16i16;   break;
899     case X86::OR32ri:     NewOpc = X86::OR32i32;   break;
900     case X86::OR64ri32:   NewOpc = X86::OR64i32;   break;
901     case X86::SBB8ri:     NewOpc = X86::SBB8i8;    break;
902     case X86::SBB16ri:    NewOpc = X86::SBB16i16;  break;
903     case X86::SBB32ri:    NewOpc = X86::SBB32i32;  break;
904     case X86::SBB64ri32:  NewOpc = X86::SBB64i32;  break;
905     case X86::SUB8ri:     NewOpc = X86::SUB8i8;    break;
906     case X86::SUB16ri:    NewOpc = X86::SUB16i16;  break;
907     case X86::SUB32ri:    NewOpc = X86::SUB32i32;  break;
908     case X86::SUB64ri32:  NewOpc = X86::SUB64i32;  break;
909     case X86::TEST8ri:    NewOpc = X86::TEST8i8;   break;
910     case X86::TEST16ri:   NewOpc = X86::TEST16i16; break;
911     case X86::TEST32ri:   NewOpc = X86::TEST32i32; break;
912     case X86::TEST64ri32: NewOpc = X86::TEST64i32; break;
913     case X86::XOR8ri:     NewOpc = X86::XOR8i8;    break;
914     case X86::XOR16ri:    NewOpc = X86::XOR16i16;  break;
915     case X86::XOR32ri:    NewOpc = X86::XOR32i32;  break;
916     case X86::XOR64ri32:  NewOpc = X86::XOR64i32;  break;
917     }
918     SimplifyShortImmForm(OutMI, NewOpc);
919     break;
920   }
921 
922   // Try to shrink some forms of movsx.
923   case X86::MOVSX16rr8:
924   case X86::MOVSX32rr16:
925   case X86::MOVSX64rr32:
926     SimplifyMOVSX(OutMI);
927     break;
928 
929   case X86::VCMPPDrri:
930   case X86::VCMPPDYrri:
931   case X86::VCMPPSrri:
932   case X86::VCMPPSYrri:
933   case X86::VCMPSDrr:
934   case X86::VCMPSSrr: {
935     // Swap the operands if it will enable a 2 byte VEX encoding.
936     // FIXME: Change the immediate to improve opportunities?
937     if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg()) &&
938         X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
939       unsigned Imm = MI->getOperand(3).getImm() & 0x7;
940       switch (Imm) {
941       default: break;
942       case 0x00: // EQUAL
943       case 0x03: // UNORDERED
944       case 0x04: // NOT EQUAL
945       case 0x07: // ORDERED
946         std::swap(OutMI.getOperand(1), OutMI.getOperand(2));
947         break;
948       }
949     }
950     break;
951   }
952 
953   case X86::VMOVHLPSrr:
954   case X86::VUNPCKHPDrr:
955     // These are not truly commutable so hide them from the default case.
956     break;
957 
958   default: {
959     // If the instruction is a commutable arithmetic instruction we might be
960     // able to commute the operands to get a 2 byte VEX prefix.
961     uint64_t TSFlags = MI->getDesc().TSFlags;
962     if (MI->getDesc().isCommutable() &&
963         (TSFlags & X86II::EncodingMask) == X86II::VEX &&
964         (TSFlags & X86II::OpMapMask) == X86II::TB &&
965         (TSFlags & X86II::FormMask) == X86II::MRMSrcReg &&
966         !(TSFlags & X86II::VEX_W) && (TSFlags & X86II::VEX_4V) &&
967         OutMI.getNumOperands() == 3) {
968       if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg()) &&
969           X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg()))
970         std::swap(OutMI.getOperand(1), OutMI.getOperand(2));
971     }
972     break;
973   }
974   }
975 }
976 
LowerTlsAddr(X86MCInstLower & MCInstLowering,const MachineInstr & MI)977 void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
978                                  const MachineInstr &MI) {
979   NoAutoPaddingScope NoPadScope(*OutStreamer);
980   bool Is64Bits = MI.getOpcode() == X86::TLS_addr64 ||
981                   MI.getOpcode() == X86::TLS_base_addr64;
982   MCContext &Ctx = OutStreamer->getContext();
983 
984   MCSymbolRefExpr::VariantKind SRVK;
985   switch (MI.getOpcode()) {
986   case X86::TLS_addr32:
987   case X86::TLS_addr64:
988     SRVK = MCSymbolRefExpr::VK_TLSGD;
989     break;
990   case X86::TLS_base_addr32:
991     SRVK = MCSymbolRefExpr::VK_TLSLDM;
992     break;
993   case X86::TLS_base_addr64:
994     SRVK = MCSymbolRefExpr::VK_TLSLD;
995     break;
996   default:
997     llvm_unreachable("unexpected opcode");
998   }
999 
1000   const MCSymbolRefExpr *Sym = MCSymbolRefExpr::create(
1001       MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)), SRVK, Ctx);
1002 
1003   // As of binutils 2.32, ld has a bogus TLS relaxation error when the GD/LD
1004   // code sequence using R_X86_64_GOTPCREL (instead of R_X86_64_GOTPCRELX) is
1005   // attempted to be relaxed to IE/LE (binutils PR24784). Work around the bug by
1006   // only using GOT when GOTPCRELX is enabled.
1007   // TODO Delete the workaround when GOTPCRELX becomes commonplace.
1008   bool UseGot = MMI->getModule()->getRtLibUseGOT() &&
1009                 Ctx.getAsmInfo()->canRelaxRelocations();
1010 
1011   if (Is64Bits) {
1012     bool NeedsPadding = SRVK == MCSymbolRefExpr::VK_TLSGD;
1013     if (NeedsPadding)
1014       EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
1015     EmitAndCountInstruction(MCInstBuilder(X86::LEA64r)
1016                                 .addReg(X86::RDI)
1017                                 .addReg(X86::RIP)
1018                                 .addImm(1)
1019                                 .addReg(0)
1020                                 .addExpr(Sym)
1021                                 .addReg(0));
1022     const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("__tls_get_addr");
1023     if (NeedsPadding) {
1024       if (!UseGot)
1025         EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
1026       EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
1027       EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX));
1028     }
1029     if (UseGot) {
1030       const MCExpr *Expr = MCSymbolRefExpr::create(
1031           TlsGetAddr, MCSymbolRefExpr::VK_GOTPCREL, Ctx);
1032       EmitAndCountInstruction(MCInstBuilder(X86::CALL64m)
1033                                   .addReg(X86::RIP)
1034                                   .addImm(1)
1035                                   .addReg(0)
1036                                   .addExpr(Expr)
1037                                   .addReg(0));
1038     } else {
1039       EmitAndCountInstruction(
1040           MCInstBuilder(X86::CALL64pcrel32)
1041               .addExpr(MCSymbolRefExpr::create(TlsGetAddr,
1042                                                MCSymbolRefExpr::VK_PLT, Ctx)));
1043     }
1044   } else {
1045     if (SRVK == MCSymbolRefExpr::VK_TLSGD && !UseGot) {
1046       EmitAndCountInstruction(MCInstBuilder(X86::LEA32r)
1047                                   .addReg(X86::EAX)
1048                                   .addReg(0)
1049                                   .addImm(1)
1050                                   .addReg(X86::EBX)
1051                                   .addExpr(Sym)
1052                                   .addReg(0));
1053     } else {
1054       EmitAndCountInstruction(MCInstBuilder(X86::LEA32r)
1055                                   .addReg(X86::EAX)
1056                                   .addReg(X86::EBX)
1057                                   .addImm(1)
1058                                   .addReg(0)
1059                                   .addExpr(Sym)
1060                                   .addReg(0));
1061     }
1062 
1063     const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("___tls_get_addr");
1064     if (UseGot) {
1065       const MCExpr *Expr =
1066           MCSymbolRefExpr::create(TlsGetAddr, MCSymbolRefExpr::VK_GOT, Ctx);
1067       EmitAndCountInstruction(MCInstBuilder(X86::CALL32m)
1068                                   .addReg(X86::EBX)
1069                                   .addImm(1)
1070                                   .addReg(0)
1071                                   .addExpr(Expr)
1072                                   .addReg(0));
1073     } else {
1074       EmitAndCountInstruction(
1075           MCInstBuilder(X86::CALLpcrel32)
1076               .addExpr(MCSymbolRefExpr::create(TlsGetAddr,
1077                                                MCSymbolRefExpr::VK_PLT, Ctx)));
1078     }
1079   }
1080 }
1081 
1082 /// Return the longest nop which can be efficiently decoded for the given
1083 /// target cpu.  15-bytes is the longest single NOP instruction, but some
1084 /// platforms can't decode the longest forms efficiently.
maxLongNopLength(const X86Subtarget * Subtarget)1085 static unsigned maxLongNopLength(const X86Subtarget *Subtarget) {
1086   if (Subtarget->getFeatureBits()[X86::ProcIntelSLM])
1087     return 7;
1088   if (Subtarget->getFeatureBits()[X86::FeatureFast15ByteNOP])
1089     return 15;
1090   if (Subtarget->getFeatureBits()[X86::FeatureFast11ByteNOP])
1091     return 11;
1092   if (Subtarget->getFeatureBits()[X86::FeatureNOPL] || Subtarget->is64Bit())
1093     return 10;
1094   if (Subtarget->is32Bit())
1095     return 2;
1096   return 1;
1097 }
1098 
1099 /// Emit the largest nop instruction smaller than or equal to \p NumBytes
1100 /// bytes.  Return the size of nop emitted.
emitNop(MCStreamer & OS,unsigned NumBytes,const X86Subtarget * Subtarget)1101 static unsigned emitNop(MCStreamer &OS, unsigned NumBytes,
1102                         const X86Subtarget *Subtarget) {
1103   // Cap a single nop emission at the profitable value for the target
1104   NumBytes = std::min(NumBytes, maxLongNopLength(Subtarget));
1105 
1106   unsigned NopSize;
1107   unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg;
1108   IndexReg = Displacement = SegmentReg = 0;
1109   BaseReg = X86::RAX;
1110   ScaleVal = 1;
1111   switch (NumBytes) {
1112   case 0:
1113     llvm_unreachable("Zero nops?");
1114     break;
1115   case 1:
1116     NopSize = 1;
1117     Opc = X86::NOOP;
1118     break;
1119   case 2:
1120     NopSize = 2;
1121     Opc = X86::XCHG16ar;
1122     break;
1123   case 3:
1124     NopSize = 3;
1125     Opc = X86::NOOPL;
1126     break;
1127   case 4:
1128     NopSize = 4;
1129     Opc = X86::NOOPL;
1130     Displacement = 8;
1131     break;
1132   case 5:
1133     NopSize = 5;
1134     Opc = X86::NOOPL;
1135     Displacement = 8;
1136     IndexReg = X86::RAX;
1137     break;
1138   case 6:
1139     NopSize = 6;
1140     Opc = X86::NOOPW;
1141     Displacement = 8;
1142     IndexReg = X86::RAX;
1143     break;
1144   case 7:
1145     NopSize = 7;
1146     Opc = X86::NOOPL;
1147     Displacement = 512;
1148     break;
1149   case 8:
1150     NopSize = 8;
1151     Opc = X86::NOOPL;
1152     Displacement = 512;
1153     IndexReg = X86::RAX;
1154     break;
1155   case 9:
1156     NopSize = 9;
1157     Opc = X86::NOOPW;
1158     Displacement = 512;
1159     IndexReg = X86::RAX;
1160     break;
1161   default:
1162     NopSize = 10;
1163     Opc = X86::NOOPW;
1164     Displacement = 512;
1165     IndexReg = X86::RAX;
1166     SegmentReg = X86::CS;
1167     break;
1168   }
1169 
1170   unsigned NumPrefixes = std::min(NumBytes - NopSize, 5U);
1171   NopSize += NumPrefixes;
1172   for (unsigned i = 0; i != NumPrefixes; ++i)
1173     OS.emitBytes("\x66");
1174 
1175   switch (Opc) {
1176   default: llvm_unreachable("Unexpected opcode");
1177   case X86::NOOP:
1178     OS.emitInstruction(MCInstBuilder(Opc), *Subtarget);
1179     break;
1180   case X86::XCHG16ar:
1181     OS.emitInstruction(MCInstBuilder(Opc).addReg(X86::AX).addReg(X86::AX),
1182                        *Subtarget);
1183     break;
1184   case X86::NOOPL:
1185   case X86::NOOPW:
1186     OS.emitInstruction(MCInstBuilder(Opc)
1187                            .addReg(BaseReg)
1188                            .addImm(ScaleVal)
1189                            .addReg(IndexReg)
1190                            .addImm(Displacement)
1191                            .addReg(SegmentReg),
1192                        *Subtarget);
1193     break;
1194   }
1195   assert(NopSize <= NumBytes && "We overemitted?");
1196   return NopSize;
1197 }
1198 
1199 /// Emit the optimal amount of multi-byte nops on X86.
emitX86Nops(MCStreamer & OS,unsigned NumBytes,const X86Subtarget * Subtarget)1200 static void emitX86Nops(MCStreamer &OS, unsigned NumBytes,
1201                         const X86Subtarget *Subtarget) {
1202   unsigned NopsToEmit = NumBytes;
1203   (void)NopsToEmit;
1204   while (NumBytes) {
1205     NumBytes -= emitNop(OS, NumBytes, Subtarget);
1206     assert(NopsToEmit >= NumBytes && "Emitted more than I asked for!");
1207   }
1208 }
1209 
LowerSTATEPOINT(const MachineInstr & MI,X86MCInstLower & MCIL)1210 void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
1211                                     X86MCInstLower &MCIL) {
1212   assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64");
1213 
1214   NoAutoPaddingScope NoPadScope(*OutStreamer);
1215 
1216   StatepointOpers SOpers(&MI);
1217   if (unsigned PatchBytes = SOpers.getNumPatchBytes()) {
1218     emitX86Nops(*OutStreamer, PatchBytes, Subtarget);
1219   } else {
1220     // Lower call target and choose correct opcode
1221     const MachineOperand &CallTarget = SOpers.getCallTarget();
1222     MCOperand CallTargetMCOp;
1223     unsigned CallOpcode;
1224     switch (CallTarget.getType()) {
1225     case MachineOperand::MO_GlobalAddress:
1226     case MachineOperand::MO_ExternalSymbol:
1227       CallTargetMCOp = MCIL.LowerSymbolOperand(
1228           CallTarget, MCIL.GetSymbolFromOperand(CallTarget));
1229       CallOpcode = X86::CALL64pcrel32;
1230       // Currently, we only support relative addressing with statepoints.
1231       // Otherwise, we'll need a scratch register to hold the target
1232       // address.  You'll fail asserts during load & relocation if this
1233       // symbol is to far away. (TODO: support non-relative addressing)
1234       break;
1235     case MachineOperand::MO_Immediate:
1236       CallTargetMCOp = MCOperand::createImm(CallTarget.getImm());
1237       CallOpcode = X86::CALL64pcrel32;
1238       // Currently, we only support relative addressing with statepoints.
1239       // Otherwise, we'll need a scratch register to hold the target
1240       // immediate.  You'll fail asserts during load & relocation if this
1241       // address is to far away. (TODO: support non-relative addressing)
1242       break;
1243     case MachineOperand::MO_Register:
1244       // FIXME: Add retpoline support and remove this.
1245       if (Subtarget->useIndirectThunkCalls())
1246         report_fatal_error("Lowering register statepoints with thunks not "
1247                            "yet implemented.");
1248       CallTargetMCOp = MCOperand::createReg(CallTarget.getReg());
1249       CallOpcode = X86::CALL64r;
1250       break;
1251     default:
1252       llvm_unreachable("Unsupported operand type in statepoint call target");
1253       break;
1254     }
1255 
1256     // Emit call
1257     MCInst CallInst;
1258     CallInst.setOpcode(CallOpcode);
1259     CallInst.addOperand(CallTargetMCOp);
1260     OutStreamer->emitInstruction(CallInst, getSubtargetInfo());
1261   }
1262 
1263   // Record our statepoint node in the same section used by STACKMAP
1264   // and PATCHPOINT
1265   auto &Ctx = OutStreamer->getContext();
1266   MCSymbol *MILabel = Ctx.createTempSymbol();
1267   OutStreamer->emitLabel(MILabel);
1268   SM.recordStatepoint(*MILabel, MI);
1269 }
1270 
LowerFAULTING_OP(const MachineInstr & FaultingMI,X86MCInstLower & MCIL)1271 void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,
1272                                      X86MCInstLower &MCIL) {
1273   // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>,
1274   //                  <opcode>, <operands>
1275 
1276   NoAutoPaddingScope NoPadScope(*OutStreamer);
1277 
1278   Register DefRegister = FaultingMI.getOperand(0).getReg();
1279   FaultMaps::FaultKind FK =
1280       static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(1).getImm());
1281   MCSymbol *HandlerLabel = FaultingMI.getOperand(2).getMBB()->getSymbol();
1282   unsigned Opcode = FaultingMI.getOperand(3).getImm();
1283   unsigned OperandsBeginIdx = 4;
1284 
1285   auto &Ctx = OutStreamer->getContext();
1286   MCSymbol *FaultingLabel = Ctx.createTempSymbol();
1287   OutStreamer->emitLabel(FaultingLabel);
1288 
1289   assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!");
1290   FM.recordFaultingOp(FK, FaultingLabel, HandlerLabel);
1291 
1292   MCInst MI;
1293   MI.setOpcode(Opcode);
1294 
1295   if (DefRegister != X86::NoRegister)
1296     MI.addOperand(MCOperand::createReg(DefRegister));
1297 
1298   for (auto I = FaultingMI.operands_begin() + OperandsBeginIdx,
1299             E = FaultingMI.operands_end();
1300        I != E; ++I)
1301     if (auto MaybeOperand = MCIL.LowerMachineOperand(&FaultingMI, *I))
1302       MI.addOperand(MaybeOperand.getValue());
1303 
1304   OutStreamer->AddComment("on-fault: " + HandlerLabel->getName());
1305   OutStreamer->emitInstruction(MI, getSubtargetInfo());
1306 }
1307 
LowerFENTRY_CALL(const MachineInstr & MI,X86MCInstLower & MCIL)1308 void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI,
1309                                      X86MCInstLower &MCIL) {
1310   bool Is64Bits = Subtarget->is64Bit();
1311   MCContext &Ctx = OutStreamer->getContext();
1312   MCSymbol *fentry = Ctx.getOrCreateSymbol("__fentry__");
1313   const MCSymbolRefExpr *Op =
1314       MCSymbolRefExpr::create(fentry, MCSymbolRefExpr::VK_None, Ctx);
1315 
1316   EmitAndCountInstruction(
1317       MCInstBuilder(Is64Bits ? X86::CALL64pcrel32 : X86::CALLpcrel32)
1318           .addExpr(Op));
1319 }
1320 
LowerPATCHABLE_OP(const MachineInstr & MI,X86MCInstLower & MCIL)1321 void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
1322                                       X86MCInstLower &MCIL) {
1323   // PATCHABLE_OP minsize, opcode, operands
1324 
1325   NoAutoPaddingScope NoPadScope(*OutStreamer);
1326 
1327   unsigned MinSize = MI.getOperand(0).getImm();
1328   unsigned Opcode = MI.getOperand(1).getImm();
1329 
1330   MCInst MCI;
1331   MCI.setOpcode(Opcode);
1332   for (auto &MO : make_range(MI.operands_begin() + 2, MI.operands_end()))
1333     if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
1334       MCI.addOperand(MaybeOperand.getValue());
1335 
1336   SmallString<256> Code;
1337   SmallVector<MCFixup, 4> Fixups;
1338   raw_svector_ostream VecOS(Code);
1339   CodeEmitter->encodeInstruction(MCI, VecOS, Fixups, getSubtargetInfo());
1340 
1341   if (Code.size() < MinSize) {
1342     if (MinSize == 2 && Subtarget->is32Bit() &&
1343         Subtarget->isTargetWindowsMSVC() &&
1344         (Subtarget->getCPU().empty() || Subtarget->getCPU() == "pentium3")) {
1345       // For compatibilty reasons, when targetting MSVC, is is important to
1346       // generate a 'legacy' NOP in the form of a 8B FF MOV EDI, EDI. Some tools
1347       // rely specifically on this pattern to be able to patch a function.
1348       // This is only for 32-bit targets, when using /arch:IA32 or /arch:SSE.
1349       OutStreamer->emitInstruction(
1350           MCInstBuilder(X86::MOV32rr_REV).addReg(X86::EDI).addReg(X86::EDI),
1351           *Subtarget);
1352     } else if (MinSize == 2 && Opcode == X86::PUSH64r) {
1353       // This is an optimization that lets us get away without emitting a nop in
1354       // many cases.
1355       //
1356       // NB! In some cases the encoding for PUSH64r (e.g. PUSH64r %r9) takes two
1357       // bytes too, so the check on MinSize is important.
1358       MCI.setOpcode(X86::PUSH64rmr);
1359     } else {
1360       unsigned NopSize = emitNop(*OutStreamer, MinSize, Subtarget);
1361       assert(NopSize == MinSize && "Could not implement MinSize!");
1362       (void)NopSize;
1363     }
1364   }
1365 
1366   OutStreamer->emitInstruction(MCI, getSubtargetInfo());
1367 }
1368 
1369 // Lower a stackmap of the form:
1370 // <id>, <shadowBytes>, ...
LowerSTACKMAP(const MachineInstr & MI)1371 void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
1372   SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
1373 
1374   auto &Ctx = OutStreamer->getContext();
1375   MCSymbol *MILabel = Ctx.createTempSymbol();
1376   OutStreamer->emitLabel(MILabel);
1377 
1378   SM.recordStackMap(*MILabel, MI);
1379   unsigned NumShadowBytes = MI.getOperand(1).getImm();
1380   SMShadowTracker.reset(NumShadowBytes);
1381 }
1382 
1383 // Lower a patchpoint of the form:
1384 // [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ...
LowerPATCHPOINT(const MachineInstr & MI,X86MCInstLower & MCIL)1385 void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
1386                                     X86MCInstLower &MCIL) {
1387   assert(Subtarget->is64Bit() && "Patchpoint currently only supports X86-64");
1388 
1389   SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
1390 
1391   NoAutoPaddingScope NoPadScope(*OutStreamer);
1392 
1393   auto &Ctx = OutStreamer->getContext();
1394   MCSymbol *MILabel = Ctx.createTempSymbol();
1395   OutStreamer->emitLabel(MILabel);
1396   SM.recordPatchPoint(*MILabel, MI);
1397 
1398   PatchPointOpers opers(&MI);
1399   unsigned ScratchIdx = opers.getNextScratchIdx();
1400   unsigned EncodedBytes = 0;
1401   const MachineOperand &CalleeMO = opers.getCallTarget();
1402 
1403   // Check for null target. If target is non-null (i.e. is non-zero or is
1404   // symbolic) then emit a call.
1405   if (!(CalleeMO.isImm() && !CalleeMO.getImm())) {
1406     MCOperand CalleeMCOp;
1407     switch (CalleeMO.getType()) {
1408     default:
1409       /// FIXME: Add a verifier check for bad callee types.
1410       llvm_unreachable("Unrecognized callee operand type.");
1411     case MachineOperand::MO_Immediate:
1412       if (CalleeMO.getImm())
1413         CalleeMCOp = MCOperand::createImm(CalleeMO.getImm());
1414       break;
1415     case MachineOperand::MO_ExternalSymbol:
1416     case MachineOperand::MO_GlobalAddress:
1417       CalleeMCOp = MCIL.LowerSymbolOperand(CalleeMO,
1418                                            MCIL.GetSymbolFromOperand(CalleeMO));
1419       break;
1420     }
1421 
1422     // Emit MOV to materialize the target address and the CALL to target.
1423     // This is encoded with 12-13 bytes, depending on which register is used.
1424     Register ScratchReg = MI.getOperand(ScratchIdx).getReg();
1425     if (X86II::isX86_64ExtendedReg(ScratchReg))
1426       EncodedBytes = 13;
1427     else
1428       EncodedBytes = 12;
1429 
1430     EmitAndCountInstruction(
1431         MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp));
1432     // FIXME: Add retpoline support and remove this.
1433     if (Subtarget->useIndirectThunkCalls())
1434       report_fatal_error(
1435           "Lowering patchpoint with thunks not yet implemented.");
1436     EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg));
1437   }
1438 
1439   // Emit padding.
1440   unsigned NumBytes = opers.getNumPatchBytes();
1441   assert(NumBytes >= EncodedBytes &&
1442          "Patchpoint can't request size less than the length of a call.");
1443 
1444   emitX86Nops(*OutStreamer, NumBytes - EncodedBytes, Subtarget);
1445 }
1446 
LowerPATCHABLE_EVENT_CALL(const MachineInstr & MI,X86MCInstLower & MCIL)1447 void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
1448                                               X86MCInstLower &MCIL) {
1449   assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64");
1450 
1451   NoAutoPaddingScope NoPadScope(*OutStreamer);
1452 
1453   // We want to emit the following pattern, which follows the x86 calling
1454   // convention to prepare for the trampoline call to be patched in.
1455   //
1456   //   .p2align 1, ...
1457   // .Lxray_event_sled_N:
1458   //   jmp +N                        // jump across the instrumentation sled
1459   //   ...                           // set up arguments in register
1460   //   callq __xray_CustomEvent@plt  // force dependency to symbol
1461   //   ...
1462   //   <jump here>
1463   //
1464   // After patching, it would look something like:
1465   //
1466   //   nopw (2-byte nop)
1467   //   ...
1468   //   callq __xrayCustomEvent  // already lowered
1469   //   ...
1470   //
1471   // ---
1472   // First we emit the label and the jump.
1473   auto CurSled = OutContext.createTempSymbol("xray_event_sled_", true);
1474   OutStreamer->AddComment("# XRay Custom Event Log");
1475   OutStreamer->emitCodeAlignment(2);
1476   OutStreamer->emitLabel(CurSled);
1477 
1478   // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1479   // an operand (computed as an offset from the jmp instruction).
1480   // FIXME: Find another less hacky way do force the relative jump.
1481   OutStreamer->emitBinaryData("\xeb\x0f");
1482 
1483   // The default C calling convention will place two arguments into %rcx and
1484   // %rdx -- so we only work with those.
1485   const Register DestRegs[] = {X86::RDI, X86::RSI};
1486   bool UsedMask[] = {false, false};
1487   // Filled out in loop.
1488   Register SrcRegs[] = {0, 0};
1489 
1490   // Then we put the operands in the %rdi and %rsi registers. We spill the
1491   // values in the register before we clobber them, and mark them as used in
1492   // UsedMask. In case the arguments are already in the correct register, we use
1493   // emit nops appropriately sized to keep the sled the same size in every
1494   // situation.
1495   for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1496     if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
1497       assert(Op->isReg() && "Only support arguments in registers");
1498       SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64);
1499       if (SrcRegs[I] != DestRegs[I]) {
1500         UsedMask[I] = true;
1501         EmitAndCountInstruction(
1502             MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
1503       } else {
1504         emitX86Nops(*OutStreamer, 4, Subtarget);
1505       }
1506     }
1507 
1508   // Now that the register values are stashed, mov arguments into place.
1509   // FIXME: This doesn't work if one of the later SrcRegs is equal to an
1510   // earlier DestReg. We will have already overwritten over the register before
1511   // we can copy from it.
1512   for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1513     if (SrcRegs[I] != DestRegs[I])
1514       EmitAndCountInstruction(
1515           MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I]));
1516 
1517   // We emit a hard dependency on the __xray_CustomEvent symbol, which is the
1518   // name of the trampoline to be implemented by the XRay runtime.
1519   auto TSym = OutContext.getOrCreateSymbol("__xray_CustomEvent");
1520   MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym);
1521   if (isPositionIndependent())
1522     TOp.setTargetFlags(X86II::MO_PLT);
1523 
1524   // Emit the call instruction.
1525   EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32)
1526                               .addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
1527 
1528   // Restore caller-saved and used registers.
1529   for (unsigned I = sizeof UsedMask; I-- > 0;)
1530     if (UsedMask[I])
1531       EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
1532     else
1533       emitX86Nops(*OutStreamer, 1, Subtarget);
1534 
1535   OutStreamer->AddComment("xray custom event end.");
1536 
1537   // Record the sled version. Version 0 of this sled was spelled differently, so
1538   // we let the runtime handle the different offsets we're using. Version 2
1539   // changed the absolute address to a PC-relative address.
1540   recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 2);
1541 }
1542 
LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr & MI,X86MCInstLower & MCIL)1543 void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
1544                                                     X86MCInstLower &MCIL) {
1545   assert(Subtarget->is64Bit() && "XRay typed events only supports X86-64");
1546 
1547   NoAutoPaddingScope NoPadScope(*OutStreamer);
1548 
1549   // We want to emit the following pattern, which follows the x86 calling
1550   // convention to prepare for the trampoline call to be patched in.
1551   //
1552   //   .p2align 1, ...
1553   // .Lxray_event_sled_N:
1554   //   jmp +N                        // jump across the instrumentation sled
1555   //   ...                           // set up arguments in register
1556   //   callq __xray_TypedEvent@plt  // force dependency to symbol
1557   //   ...
1558   //   <jump here>
1559   //
1560   // After patching, it would look something like:
1561   //
1562   //   nopw (2-byte nop)
1563   //   ...
1564   //   callq __xrayTypedEvent  // already lowered
1565   //   ...
1566   //
1567   // ---
1568   // First we emit the label and the jump.
1569   auto CurSled = OutContext.createTempSymbol("xray_typed_event_sled_", true);
1570   OutStreamer->AddComment("# XRay Typed Event Log");
1571   OutStreamer->emitCodeAlignment(2);
1572   OutStreamer->emitLabel(CurSled);
1573 
1574   // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1575   // an operand (computed as an offset from the jmp instruction).
1576   // FIXME: Find another less hacky way do force the relative jump.
1577   OutStreamer->emitBinaryData("\xeb\x14");
1578 
1579   // An x86-64 convention may place three arguments into %rcx, %rdx, and R8,
1580   // so we'll work with those. Or we may be called via SystemV, in which case
1581   // we don't have to do any translation.
1582   const Register DestRegs[] = {X86::RDI, X86::RSI, X86::RDX};
1583   bool UsedMask[] = {false, false, false};
1584 
1585   // Will fill out src regs in the loop.
1586   Register SrcRegs[] = {0, 0, 0};
1587 
1588   // Then we put the operands in the SystemV registers. We spill the values in
1589   // the registers before we clobber them, and mark them as used in UsedMask.
1590   // In case the arguments are already in the correct register, we emit nops
1591   // appropriately sized to keep the sled the same size in every situation.
1592   for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1593     if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
1594       // TODO: Is register only support adequate?
1595       assert(Op->isReg() && "Only supports arguments in registers");
1596       SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64);
1597       if (SrcRegs[I] != DestRegs[I]) {
1598         UsedMask[I] = true;
1599         EmitAndCountInstruction(
1600             MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
1601       } else {
1602         emitX86Nops(*OutStreamer, 4, Subtarget);
1603       }
1604     }
1605 
1606   // In the above loop we only stash all of the destination registers or emit
1607   // nops if the arguments are already in the right place. Doing the actually
1608   // moving is postponed until after all the registers are stashed so nothing
1609   // is clobbers. We've already added nops to account for the size of mov and
1610   // push if the register is in the right place, so we only have to worry about
1611   // emitting movs.
1612   // FIXME: This doesn't work if one of the later SrcRegs is equal to an
1613   // earlier DestReg. We will have already overwritten over the register before
1614   // we can copy from it.
1615   for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1616     if (UsedMask[I])
1617       EmitAndCountInstruction(
1618           MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I]));
1619 
1620   // We emit a hard dependency on the __xray_TypedEvent symbol, which is the
1621   // name of the trampoline to be implemented by the XRay runtime.
1622   auto TSym = OutContext.getOrCreateSymbol("__xray_TypedEvent");
1623   MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym);
1624   if (isPositionIndependent())
1625     TOp.setTargetFlags(X86II::MO_PLT);
1626 
1627   // Emit the call instruction.
1628   EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32)
1629                               .addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
1630 
1631   // Restore caller-saved and used registers.
1632   for (unsigned I = sizeof UsedMask; I-- > 0;)
1633     if (UsedMask[I])
1634       EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
1635     else
1636       emitX86Nops(*OutStreamer, 1, Subtarget);
1637 
1638   OutStreamer->AddComment("xray typed event end.");
1639 
1640   // Record the sled version.
1641   recordSled(CurSled, MI, SledKind::TYPED_EVENT, 2);
1642 }
1643 
LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr & MI,X86MCInstLower & MCIL)1644 void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
1645                                                   X86MCInstLower &MCIL) {
1646 
1647   NoAutoPaddingScope NoPadScope(*OutStreamer);
1648 
1649   const Function &F = MF->getFunction();
1650   if (F.hasFnAttribute("patchable-function-entry")) {
1651     unsigned Num;
1652     if (F.getFnAttribute("patchable-function-entry")
1653             .getValueAsString()
1654             .getAsInteger(10, Num))
1655       return;
1656     emitX86Nops(*OutStreamer, Num, Subtarget);
1657     return;
1658   }
1659   // We want to emit the following pattern:
1660   //
1661   //   .p2align 1, ...
1662   // .Lxray_sled_N:
1663   //   jmp .tmpN
1664   //   # 9 bytes worth of noops
1665   //
1666   // We need the 9 bytes because at runtime, we'd be patching over the full 11
1667   // bytes with the following pattern:
1668   //
1669   //   mov %r10, <function id, 32-bit>   // 6 bytes
1670   //   call <relative offset, 32-bits>   // 5 bytes
1671   //
1672   auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1673   OutStreamer->emitCodeAlignment(2);
1674   OutStreamer->emitLabel(CurSled);
1675 
1676   // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1677   // an operand (computed as an offset from the jmp instruction).
1678   // FIXME: Find another less hacky way do force the relative jump.
1679   OutStreamer->emitBytes("\xeb\x09");
1680   emitX86Nops(*OutStreamer, 9, Subtarget);
1681   recordSled(CurSled, MI, SledKind::FUNCTION_ENTER, 2);
1682 }
1683 
LowerPATCHABLE_RET(const MachineInstr & MI,X86MCInstLower & MCIL)1684 void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
1685                                        X86MCInstLower &MCIL) {
1686   NoAutoPaddingScope NoPadScope(*OutStreamer);
1687 
1688   // Since PATCHABLE_RET takes the opcode of the return statement as an
1689   // argument, we use that to emit the correct form of the RET that we want.
1690   // i.e. when we see this:
1691   //
1692   //   PATCHABLE_RET X86::RET ...
1693   //
1694   // We should emit the RET followed by sleds.
1695   //
1696   //   .p2align 1, ...
1697   // .Lxray_sled_N:
1698   //   ret  # or equivalent instruction
1699   //   # 10 bytes worth of noops
1700   //
1701   // This just makes sure that the alignment for the next instruction is 2.
1702   auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1703   OutStreamer->emitCodeAlignment(2);
1704   OutStreamer->emitLabel(CurSled);
1705   unsigned OpCode = MI.getOperand(0).getImm();
1706   MCInst Ret;
1707   Ret.setOpcode(OpCode);
1708   for (auto &MO : make_range(MI.operands_begin() + 1, MI.operands_end()))
1709     if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
1710       Ret.addOperand(MaybeOperand.getValue());
1711   OutStreamer->emitInstruction(Ret, getSubtargetInfo());
1712   emitX86Nops(*OutStreamer, 10, Subtarget);
1713   recordSled(CurSled, MI, SledKind::FUNCTION_EXIT, 2);
1714 }
1715 
LowerPATCHABLE_TAIL_CALL(const MachineInstr & MI,X86MCInstLower & MCIL)1716 void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,
1717                                              X86MCInstLower &MCIL) {
1718   NoAutoPaddingScope NoPadScope(*OutStreamer);
1719 
1720   // Like PATCHABLE_RET, we have the actual instruction in the operands to this
1721   // instruction so we lower that particular instruction and its operands.
1722   // Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how
1723   // we do it for PATCHABLE_FUNCTION_ENTER. The sled should be very similar to
1724   // the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual
1725   // tail call much like how we have it in PATCHABLE_RET.
1726   auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1727   OutStreamer->emitCodeAlignment(2);
1728   OutStreamer->emitLabel(CurSled);
1729   auto Target = OutContext.createTempSymbol();
1730 
1731   // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1732   // an operand (computed as an offset from the jmp instruction).
1733   // FIXME: Find another less hacky way do force the relative jump.
1734   OutStreamer->emitBytes("\xeb\x09");
1735   emitX86Nops(*OutStreamer, 9, Subtarget);
1736   OutStreamer->emitLabel(Target);
1737   recordSled(CurSled, MI, SledKind::TAIL_CALL, 2);
1738 
1739   unsigned OpCode = MI.getOperand(0).getImm();
1740   OpCode = convertTailJumpOpcode(OpCode);
1741   MCInst TC;
1742   TC.setOpcode(OpCode);
1743 
1744   // Before emitting the instruction, add a comment to indicate that this is
1745   // indeed a tail call.
1746   OutStreamer->AddComment("TAILCALL");
1747   for (auto &MO : make_range(MI.operands_begin() + 1, MI.operands_end()))
1748     if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
1749       TC.addOperand(MaybeOperand.getValue());
1750   OutStreamer->emitInstruction(TC, getSubtargetInfo());
1751 }
1752 
1753 // Returns instruction preceding MBBI in MachineFunction.
1754 // If MBBI is the first instruction of the first basic block, returns null.
1755 static MachineBasicBlock::const_iterator
PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI)1756 PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) {
1757   const MachineBasicBlock *MBB = MBBI->getParent();
1758   while (MBBI == MBB->begin()) {
1759     if (MBB == &MBB->getParent()->front())
1760       return MachineBasicBlock::const_iterator();
1761     MBB = MBB->getPrevNode();
1762     MBBI = MBB->end();
1763   }
1764   --MBBI;
1765   return MBBI;
1766 }
1767 
getConstantFromPool(const MachineInstr & MI,const MachineOperand & Op)1768 static const Constant *getConstantFromPool(const MachineInstr &MI,
1769                                            const MachineOperand &Op) {
1770   if (!Op.isCPI() || Op.getOffset() != 0)
1771     return nullptr;
1772 
1773   ArrayRef<MachineConstantPoolEntry> Constants =
1774       MI.getParent()->getParent()->getConstantPool()->getConstants();
1775   const MachineConstantPoolEntry &ConstantEntry = Constants[Op.getIndex()];
1776 
1777   // Bail if this is a machine constant pool entry, we won't be able to dig out
1778   // anything useful.
1779   if (ConstantEntry.isMachineConstantPoolEntry())
1780     return nullptr;
1781 
1782   const Constant *C = ConstantEntry.Val.ConstVal;
1783   assert((!C || ConstantEntry.getType() == C->getType()) &&
1784          "Expected a constant of the same type!");
1785   return C;
1786 }
1787 
getShuffleComment(const MachineInstr * MI,unsigned SrcOp1Idx,unsigned SrcOp2Idx,ArrayRef<int> Mask)1788 static std::string getShuffleComment(const MachineInstr *MI, unsigned SrcOp1Idx,
1789                                      unsigned SrcOp2Idx, ArrayRef<int> Mask) {
1790   std::string Comment;
1791 
1792   // Compute the name for a register. This is really goofy because we have
1793   // multiple instruction printers that could (in theory) use different
1794   // names. Fortunately most people use the ATT style (outside of Windows)
1795   // and they actually agree on register naming here. Ultimately, this is
1796   // a comment, and so its OK if it isn't perfect.
1797   auto GetRegisterName = [](unsigned RegNum) -> StringRef {
1798     return X86ATTInstPrinter::getRegisterName(RegNum);
1799   };
1800 
1801   const MachineOperand &DstOp = MI->getOperand(0);
1802   const MachineOperand &SrcOp1 = MI->getOperand(SrcOp1Idx);
1803   const MachineOperand &SrcOp2 = MI->getOperand(SrcOp2Idx);
1804 
1805   StringRef DstName = DstOp.isReg() ? GetRegisterName(DstOp.getReg()) : "mem";
1806   StringRef Src1Name =
1807       SrcOp1.isReg() ? GetRegisterName(SrcOp1.getReg()) : "mem";
1808   StringRef Src2Name =
1809       SrcOp2.isReg() ? GetRegisterName(SrcOp2.getReg()) : "mem";
1810 
1811   // One source operand, fix the mask to print all elements in one span.
1812   SmallVector<int, 8> ShuffleMask(Mask.begin(), Mask.end());
1813   if (Src1Name == Src2Name)
1814     for (int i = 0, e = ShuffleMask.size(); i != e; ++i)
1815       if (ShuffleMask[i] >= e)
1816         ShuffleMask[i] -= e;
1817 
1818   raw_string_ostream CS(Comment);
1819   CS << DstName;
1820 
1821   // Handle AVX512 MASK/MASXZ write mask comments.
1822   // MASK: zmmX {%kY}
1823   // MASKZ: zmmX {%kY} {z}
1824   if (SrcOp1Idx > 1) {
1825     assert((SrcOp1Idx == 2 || SrcOp1Idx == 3) && "Unexpected writemask");
1826 
1827     const MachineOperand &WriteMaskOp = MI->getOperand(SrcOp1Idx - 1);
1828     if (WriteMaskOp.isReg()) {
1829       CS << " {%" << GetRegisterName(WriteMaskOp.getReg()) << "}";
1830 
1831       if (SrcOp1Idx == 2) {
1832         CS << " {z}";
1833       }
1834     }
1835   }
1836 
1837   CS << " = ";
1838 
1839   for (int i = 0, e = ShuffleMask.size(); i != e; ++i) {
1840     if (i != 0)
1841       CS << ",";
1842     if (ShuffleMask[i] == SM_SentinelZero) {
1843       CS << "zero";
1844       continue;
1845     }
1846 
1847     // Otherwise, it must come from src1 or src2.  Print the span of elements
1848     // that comes from this src.
1849     bool isSrc1 = ShuffleMask[i] < (int)e;
1850     CS << (isSrc1 ? Src1Name : Src2Name) << '[';
1851 
1852     bool IsFirst = true;
1853     while (i != e && ShuffleMask[i] != SM_SentinelZero &&
1854            (ShuffleMask[i] < (int)e) == isSrc1) {
1855       if (!IsFirst)
1856         CS << ',';
1857       else
1858         IsFirst = false;
1859       if (ShuffleMask[i] == SM_SentinelUndef)
1860         CS << "u";
1861       else
1862         CS << ShuffleMask[i] % (int)e;
1863       ++i;
1864     }
1865     CS << ']';
1866     --i; // For loop increments element #.
1867   }
1868   CS.flush();
1869 
1870   return Comment;
1871 }
1872 
printConstant(const APInt & Val,raw_ostream & CS)1873 static void printConstant(const APInt &Val, raw_ostream &CS) {
1874   if (Val.getBitWidth() <= 64) {
1875     CS << Val.getZExtValue();
1876   } else {
1877     // print multi-word constant as (w0,w1)
1878     CS << "(";
1879     for (int i = 0, N = Val.getNumWords(); i < N; ++i) {
1880       if (i > 0)
1881         CS << ",";
1882       CS << Val.getRawData()[i];
1883     }
1884     CS << ")";
1885   }
1886 }
1887 
printConstant(const APFloat & Flt,raw_ostream & CS)1888 static void printConstant(const APFloat &Flt, raw_ostream &CS) {
1889   SmallString<32> Str;
1890   // Force scientific notation to distinquish from integers.
1891   Flt.toString(Str, 0, 0);
1892   CS << Str;
1893 }
1894 
printConstant(const Constant * COp,raw_ostream & CS)1895 static void printConstant(const Constant *COp, raw_ostream &CS) {
1896   if (isa<UndefValue>(COp)) {
1897     CS << "u";
1898   } else if (auto *CI = dyn_cast<ConstantInt>(COp)) {
1899     printConstant(CI->getValue(), CS);
1900   } else if (auto *CF = dyn_cast<ConstantFP>(COp)) {
1901     printConstant(CF->getValueAPF(), CS);
1902   } else {
1903     CS << "?";
1904   }
1905 }
1906 
EmitSEHInstruction(const MachineInstr * MI)1907 void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) {
1908   assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
1909   assert(getSubtarget().isOSWindows() && "SEH_ instruction Windows only");
1910 
1911   // Use the .cv_fpo directives if we're emitting CodeView on 32-bit x86.
1912   if (EmitFPOData) {
1913     X86TargetStreamer *XTS =
1914         static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer());
1915     switch (MI->getOpcode()) {
1916     case X86::SEH_PushReg:
1917       XTS->emitFPOPushReg(MI->getOperand(0).getImm());
1918       break;
1919     case X86::SEH_StackAlloc:
1920       XTS->emitFPOStackAlloc(MI->getOperand(0).getImm());
1921       break;
1922     case X86::SEH_StackAlign:
1923       XTS->emitFPOStackAlign(MI->getOperand(0).getImm());
1924       break;
1925     case X86::SEH_SetFrame:
1926       assert(MI->getOperand(1).getImm() == 0 &&
1927              ".cv_fpo_setframe takes no offset");
1928       XTS->emitFPOSetFrame(MI->getOperand(0).getImm());
1929       break;
1930     case X86::SEH_EndPrologue:
1931       XTS->emitFPOEndPrologue();
1932       break;
1933     case X86::SEH_SaveReg:
1934     case X86::SEH_SaveXMM:
1935     case X86::SEH_PushFrame:
1936       llvm_unreachable("SEH_ directive incompatible with FPO");
1937       break;
1938     default:
1939       llvm_unreachable("expected SEH_ instruction");
1940     }
1941     return;
1942   }
1943 
1944   // Otherwise, use the .seh_ directives for all other Windows platforms.
1945   switch (MI->getOpcode()) {
1946   case X86::SEH_PushReg:
1947     OutStreamer->EmitWinCFIPushReg(MI->getOperand(0).getImm());
1948     break;
1949 
1950   case X86::SEH_SaveReg:
1951     OutStreamer->EmitWinCFISaveReg(MI->getOperand(0).getImm(),
1952                                    MI->getOperand(1).getImm());
1953     break;
1954 
1955   case X86::SEH_SaveXMM:
1956     OutStreamer->EmitWinCFISaveXMM(MI->getOperand(0).getImm(),
1957                                    MI->getOperand(1).getImm());
1958     break;
1959 
1960   case X86::SEH_StackAlloc:
1961     OutStreamer->EmitWinCFIAllocStack(MI->getOperand(0).getImm());
1962     break;
1963 
1964   case X86::SEH_SetFrame:
1965     OutStreamer->EmitWinCFISetFrame(MI->getOperand(0).getImm(),
1966                                     MI->getOperand(1).getImm());
1967     break;
1968 
1969   case X86::SEH_PushFrame:
1970     OutStreamer->EmitWinCFIPushFrame(MI->getOperand(0).getImm());
1971     break;
1972 
1973   case X86::SEH_EndPrologue:
1974     OutStreamer->EmitWinCFIEndProlog();
1975     break;
1976 
1977   default:
1978     llvm_unreachable("expected SEH_ instruction");
1979   }
1980 }
1981 
getRegisterWidth(const MCOperandInfo & Info)1982 static unsigned getRegisterWidth(const MCOperandInfo &Info) {
1983   if (Info.RegClass == X86::VR128RegClassID ||
1984       Info.RegClass == X86::VR128XRegClassID)
1985     return 128;
1986   if (Info.RegClass == X86::VR256RegClassID ||
1987       Info.RegClass == X86::VR256XRegClassID)
1988     return 256;
1989   if (Info.RegClass == X86::VR512RegClassID)
1990     return 512;
1991   llvm_unreachable("Unknown register class!");
1992 }
1993 
addConstantComments(const MachineInstr * MI,MCStreamer & OutStreamer)1994 static void addConstantComments(const MachineInstr *MI,
1995                                 MCStreamer &OutStreamer) {
1996   switch (MI->getOpcode()) {
1997   // Lower PSHUFB and VPERMILP normally but add a comment if we can find
1998   // a constant shuffle mask. We won't be able to do this at the MC layer
1999   // because the mask isn't an immediate.
2000   case X86::PSHUFBrm:
2001   case X86::VPSHUFBrm:
2002   case X86::VPSHUFBYrm:
2003   case X86::VPSHUFBZ128rm:
2004   case X86::VPSHUFBZ128rmk:
2005   case X86::VPSHUFBZ128rmkz:
2006   case X86::VPSHUFBZ256rm:
2007   case X86::VPSHUFBZ256rmk:
2008   case X86::VPSHUFBZ256rmkz:
2009   case X86::VPSHUFBZrm:
2010   case X86::VPSHUFBZrmk:
2011   case X86::VPSHUFBZrmkz: {
2012     unsigned SrcIdx = 1;
2013     if (X86II::isKMasked(MI->getDesc().TSFlags)) {
2014       // Skip mask operand.
2015       ++SrcIdx;
2016       if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) {
2017         // Skip passthru operand.
2018         ++SrcIdx;
2019       }
2020     }
2021     unsigned MaskIdx = SrcIdx + 1 + X86::AddrDisp;
2022 
2023     assert(MI->getNumOperands() >= (SrcIdx + 1 + X86::AddrNumOperands) &&
2024            "Unexpected number of operands!");
2025 
2026     const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
2027     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2028       unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2029       SmallVector<int, 64> Mask;
2030       DecodePSHUFBMask(C, Width, Mask);
2031       if (!Mask.empty())
2032         OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
2033     }
2034     break;
2035   }
2036 
2037   case X86::VPERMILPSrm:
2038   case X86::VPERMILPSYrm:
2039   case X86::VPERMILPSZ128rm:
2040   case X86::VPERMILPSZ128rmk:
2041   case X86::VPERMILPSZ128rmkz:
2042   case X86::VPERMILPSZ256rm:
2043   case X86::VPERMILPSZ256rmk:
2044   case X86::VPERMILPSZ256rmkz:
2045   case X86::VPERMILPSZrm:
2046   case X86::VPERMILPSZrmk:
2047   case X86::VPERMILPSZrmkz:
2048   case X86::VPERMILPDrm:
2049   case X86::VPERMILPDYrm:
2050   case X86::VPERMILPDZ128rm:
2051   case X86::VPERMILPDZ128rmk:
2052   case X86::VPERMILPDZ128rmkz:
2053   case X86::VPERMILPDZ256rm:
2054   case X86::VPERMILPDZ256rmk:
2055   case X86::VPERMILPDZ256rmkz:
2056   case X86::VPERMILPDZrm:
2057   case X86::VPERMILPDZrmk:
2058   case X86::VPERMILPDZrmkz: {
2059     unsigned ElSize;
2060     switch (MI->getOpcode()) {
2061     default: llvm_unreachable("Invalid opcode");
2062     case X86::VPERMILPSrm:
2063     case X86::VPERMILPSYrm:
2064     case X86::VPERMILPSZ128rm:
2065     case X86::VPERMILPSZ256rm:
2066     case X86::VPERMILPSZrm:
2067     case X86::VPERMILPSZ128rmkz:
2068     case X86::VPERMILPSZ256rmkz:
2069     case X86::VPERMILPSZrmkz:
2070     case X86::VPERMILPSZ128rmk:
2071     case X86::VPERMILPSZ256rmk:
2072     case X86::VPERMILPSZrmk:
2073       ElSize = 32;
2074       break;
2075     case X86::VPERMILPDrm:
2076     case X86::VPERMILPDYrm:
2077     case X86::VPERMILPDZ128rm:
2078     case X86::VPERMILPDZ256rm:
2079     case X86::VPERMILPDZrm:
2080     case X86::VPERMILPDZ128rmkz:
2081     case X86::VPERMILPDZ256rmkz:
2082     case X86::VPERMILPDZrmkz:
2083     case X86::VPERMILPDZ128rmk:
2084     case X86::VPERMILPDZ256rmk:
2085     case X86::VPERMILPDZrmk:
2086       ElSize = 64;
2087       break;
2088     }
2089 
2090     unsigned SrcIdx = 1;
2091     if (X86II::isKMasked(MI->getDesc().TSFlags)) {
2092       // Skip mask operand.
2093       ++SrcIdx;
2094       if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) {
2095         // Skip passthru operand.
2096         ++SrcIdx;
2097       }
2098     }
2099     unsigned MaskIdx = SrcIdx + 1 + X86::AddrDisp;
2100 
2101     assert(MI->getNumOperands() >= (SrcIdx + 1 + X86::AddrNumOperands) &&
2102            "Unexpected number of operands!");
2103 
2104     const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
2105     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2106       unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2107       SmallVector<int, 16> Mask;
2108       DecodeVPERMILPMask(C, ElSize, Width, Mask);
2109       if (!Mask.empty())
2110         OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
2111     }
2112     break;
2113   }
2114 
2115   case X86::VPERMIL2PDrm:
2116   case X86::VPERMIL2PSrm:
2117   case X86::VPERMIL2PDYrm:
2118   case X86::VPERMIL2PSYrm: {
2119     assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands + 1) &&
2120            "Unexpected number of operands!");
2121 
2122     const MachineOperand &CtrlOp = MI->getOperand(MI->getNumOperands() - 1);
2123     if (!CtrlOp.isImm())
2124       break;
2125 
2126     unsigned ElSize;
2127     switch (MI->getOpcode()) {
2128     default: llvm_unreachable("Invalid opcode");
2129     case X86::VPERMIL2PSrm: case X86::VPERMIL2PSYrm: ElSize = 32; break;
2130     case X86::VPERMIL2PDrm: case X86::VPERMIL2PDYrm: ElSize = 64; break;
2131     }
2132 
2133     const MachineOperand &MaskOp = MI->getOperand(3 + X86::AddrDisp);
2134     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2135       unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2136       SmallVector<int, 16> Mask;
2137       DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Width, Mask);
2138       if (!Mask.empty())
2139         OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask));
2140     }
2141     break;
2142   }
2143 
2144   case X86::VPPERMrrm: {
2145     assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands) &&
2146            "Unexpected number of operands!");
2147 
2148     const MachineOperand &MaskOp = MI->getOperand(3 + X86::AddrDisp);
2149     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2150       unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2151       SmallVector<int, 16> Mask;
2152       DecodeVPPERMMask(C, Width, Mask);
2153       if (!Mask.empty())
2154         OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask));
2155     }
2156     break;
2157   }
2158 
2159   case X86::MMX_MOVQ64rm: {
2160     assert(MI->getNumOperands() == (1 + X86::AddrNumOperands) &&
2161            "Unexpected number of operands!");
2162     if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
2163       std::string Comment;
2164       raw_string_ostream CS(Comment);
2165       const MachineOperand &DstOp = MI->getOperand(0);
2166       CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
2167       if (auto *CF = dyn_cast<ConstantFP>(C)) {
2168         CS << "0x" << CF->getValueAPF().bitcastToAPInt().toString(16, false);
2169         OutStreamer.AddComment(CS.str());
2170       }
2171     }
2172     break;
2173   }
2174 
2175 #define MOV_CASE(Prefix, Suffix)                                               \
2176   case X86::Prefix##MOVAPD##Suffix##rm:                                        \
2177   case X86::Prefix##MOVAPS##Suffix##rm:                                        \
2178   case X86::Prefix##MOVUPD##Suffix##rm:                                        \
2179   case X86::Prefix##MOVUPS##Suffix##rm:                                        \
2180   case X86::Prefix##MOVDQA##Suffix##rm:                                        \
2181   case X86::Prefix##MOVDQU##Suffix##rm:
2182 
2183 #define MOV_AVX512_CASE(Suffix)                                                \
2184   case X86::VMOVDQA64##Suffix##rm:                                             \
2185   case X86::VMOVDQA32##Suffix##rm:                                             \
2186   case X86::VMOVDQU64##Suffix##rm:                                             \
2187   case X86::VMOVDQU32##Suffix##rm:                                             \
2188   case X86::VMOVDQU16##Suffix##rm:                                             \
2189   case X86::VMOVDQU8##Suffix##rm:                                              \
2190   case X86::VMOVAPS##Suffix##rm:                                               \
2191   case X86::VMOVAPD##Suffix##rm:                                               \
2192   case X86::VMOVUPS##Suffix##rm:                                               \
2193   case X86::VMOVUPD##Suffix##rm:
2194 
2195 #define CASE_ALL_MOV_RM()                                                      \
2196   MOV_CASE(, )   /* SSE */                                                     \
2197   MOV_CASE(V, )  /* AVX-128 */                                                 \
2198   MOV_CASE(V, Y) /* AVX-256 */                                                 \
2199   MOV_AVX512_CASE(Z)                                                           \
2200   MOV_AVX512_CASE(Z256)                                                        \
2201   MOV_AVX512_CASE(Z128)
2202 
2203     // For loads from a constant pool to a vector register, print the constant
2204     // loaded.
2205     CASE_ALL_MOV_RM()
2206   case X86::VBROADCASTF128:
2207   case X86::VBROADCASTI128:
2208   case X86::VBROADCASTF32X4Z256rm:
2209   case X86::VBROADCASTF32X4rm:
2210   case X86::VBROADCASTF32X8rm:
2211   case X86::VBROADCASTF64X2Z128rm:
2212   case X86::VBROADCASTF64X2rm:
2213   case X86::VBROADCASTF64X4rm:
2214   case X86::VBROADCASTI32X4Z256rm:
2215   case X86::VBROADCASTI32X4rm:
2216   case X86::VBROADCASTI32X8rm:
2217   case X86::VBROADCASTI64X2Z128rm:
2218   case X86::VBROADCASTI64X2rm:
2219   case X86::VBROADCASTI64X4rm:
2220     assert(MI->getNumOperands() >= (1 + X86::AddrNumOperands) &&
2221            "Unexpected number of operands!");
2222     if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
2223       int NumLanes = 1;
2224       // Override NumLanes for the broadcast instructions.
2225       switch (MI->getOpcode()) {
2226       case X86::VBROADCASTF128:        NumLanes = 2; break;
2227       case X86::VBROADCASTI128:        NumLanes = 2; break;
2228       case X86::VBROADCASTF32X4Z256rm: NumLanes = 2; break;
2229       case X86::VBROADCASTF32X4rm:     NumLanes = 4; break;
2230       case X86::VBROADCASTF32X8rm:     NumLanes = 2; break;
2231       case X86::VBROADCASTF64X2Z128rm: NumLanes = 2; break;
2232       case X86::VBROADCASTF64X2rm:     NumLanes = 4; break;
2233       case X86::VBROADCASTF64X4rm:     NumLanes = 2; break;
2234       case X86::VBROADCASTI32X4Z256rm: NumLanes = 2; break;
2235       case X86::VBROADCASTI32X4rm:     NumLanes = 4; break;
2236       case X86::VBROADCASTI32X8rm:     NumLanes = 2; break;
2237       case X86::VBROADCASTI64X2Z128rm: NumLanes = 2; break;
2238       case X86::VBROADCASTI64X2rm:     NumLanes = 4; break;
2239       case X86::VBROADCASTI64X4rm:     NumLanes = 2; break;
2240       }
2241 
2242       std::string Comment;
2243       raw_string_ostream CS(Comment);
2244       const MachineOperand &DstOp = MI->getOperand(0);
2245       CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
2246       if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
2247         CS << "[";
2248         for (int l = 0; l != NumLanes; ++l) {
2249           for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements;
2250                ++i) {
2251             if (i != 0 || l != 0)
2252               CS << ",";
2253             if (CDS->getElementType()->isIntegerTy())
2254               printConstant(CDS->getElementAsAPInt(i), CS);
2255             else if (CDS->getElementType()->isHalfTy() ||
2256                      CDS->getElementType()->isFloatTy() ||
2257                      CDS->getElementType()->isDoubleTy())
2258               printConstant(CDS->getElementAsAPFloat(i), CS);
2259             else
2260               CS << "?";
2261           }
2262         }
2263         CS << "]";
2264         OutStreamer.AddComment(CS.str());
2265       } else if (auto *CV = dyn_cast<ConstantVector>(C)) {
2266         CS << "<";
2267         for (int l = 0; l != NumLanes; ++l) {
2268           for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands;
2269                ++i) {
2270             if (i != 0 || l != 0)
2271               CS << ",";
2272             printConstant(CV->getOperand(i), CS);
2273           }
2274         }
2275         CS << ">";
2276         OutStreamer.AddComment(CS.str());
2277       }
2278     }
2279     break;
2280 
2281   case X86::MOVDDUPrm:
2282   case X86::VMOVDDUPrm:
2283   case X86::VMOVDDUPZ128rm:
2284   case X86::VBROADCASTSSrm:
2285   case X86::VBROADCASTSSYrm:
2286   case X86::VBROADCASTSSZ128rm:
2287   case X86::VBROADCASTSSZ256rm:
2288   case X86::VBROADCASTSSZrm:
2289   case X86::VBROADCASTSDYrm:
2290   case X86::VBROADCASTSDZ256rm:
2291   case X86::VBROADCASTSDZrm:
2292   case X86::VPBROADCASTBrm:
2293   case X86::VPBROADCASTBYrm:
2294   case X86::VPBROADCASTBZ128rm:
2295   case X86::VPBROADCASTBZ256rm:
2296   case X86::VPBROADCASTBZrm:
2297   case X86::VPBROADCASTDrm:
2298   case X86::VPBROADCASTDYrm:
2299   case X86::VPBROADCASTDZ128rm:
2300   case X86::VPBROADCASTDZ256rm:
2301   case X86::VPBROADCASTDZrm:
2302   case X86::VPBROADCASTQrm:
2303   case X86::VPBROADCASTQYrm:
2304   case X86::VPBROADCASTQZ128rm:
2305   case X86::VPBROADCASTQZ256rm:
2306   case X86::VPBROADCASTQZrm:
2307   case X86::VPBROADCASTWrm:
2308   case X86::VPBROADCASTWYrm:
2309   case X86::VPBROADCASTWZ128rm:
2310   case X86::VPBROADCASTWZ256rm:
2311   case X86::VPBROADCASTWZrm:
2312     assert(MI->getNumOperands() >= (1 + X86::AddrNumOperands) &&
2313            "Unexpected number of operands!");
2314     if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
2315       int NumElts;
2316       switch (MI->getOpcode()) {
2317       default: llvm_unreachable("Invalid opcode");
2318       case X86::MOVDDUPrm:          NumElts = 2;  break;
2319       case X86::VMOVDDUPrm:         NumElts = 2;  break;
2320       case X86::VMOVDDUPZ128rm:     NumElts = 2;  break;
2321       case X86::VBROADCASTSSrm:     NumElts = 4;  break;
2322       case X86::VBROADCASTSSYrm:    NumElts = 8;  break;
2323       case X86::VBROADCASTSSZ128rm: NumElts = 4;  break;
2324       case X86::VBROADCASTSSZ256rm: NumElts = 8;  break;
2325       case X86::VBROADCASTSSZrm:    NumElts = 16; break;
2326       case X86::VBROADCASTSDYrm:    NumElts = 4;  break;
2327       case X86::VBROADCASTSDZ256rm: NumElts = 4;  break;
2328       case X86::VBROADCASTSDZrm:    NumElts = 8;  break;
2329       case X86::VPBROADCASTBrm:     NumElts = 16; break;
2330       case X86::VPBROADCASTBYrm:    NumElts = 32; break;
2331       case X86::VPBROADCASTBZ128rm: NumElts = 16; break;
2332       case X86::VPBROADCASTBZ256rm: NumElts = 32; break;
2333       case X86::VPBROADCASTBZrm:    NumElts = 64; break;
2334       case X86::VPBROADCASTDrm:     NumElts = 4;  break;
2335       case X86::VPBROADCASTDYrm:    NumElts = 8;  break;
2336       case X86::VPBROADCASTDZ128rm: NumElts = 4;  break;
2337       case X86::VPBROADCASTDZ256rm: NumElts = 8;  break;
2338       case X86::VPBROADCASTDZrm:    NumElts = 16; break;
2339       case X86::VPBROADCASTQrm:     NumElts = 2;  break;
2340       case X86::VPBROADCASTQYrm:    NumElts = 4;  break;
2341       case X86::VPBROADCASTQZ128rm: NumElts = 2;  break;
2342       case X86::VPBROADCASTQZ256rm: NumElts = 4;  break;
2343       case X86::VPBROADCASTQZrm:    NumElts = 8;  break;
2344       case X86::VPBROADCASTWrm:     NumElts = 8;  break;
2345       case X86::VPBROADCASTWYrm:    NumElts = 16; break;
2346       case X86::VPBROADCASTWZ128rm: NumElts = 8;  break;
2347       case X86::VPBROADCASTWZ256rm: NumElts = 16; break;
2348       case X86::VPBROADCASTWZrm:    NumElts = 32; break;
2349       }
2350 
2351       std::string Comment;
2352       raw_string_ostream CS(Comment);
2353       const MachineOperand &DstOp = MI->getOperand(0);
2354       CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
2355       CS << "[";
2356       for (int i = 0; i != NumElts; ++i) {
2357         if (i != 0)
2358           CS << ",";
2359         printConstant(C, CS);
2360       }
2361       CS << "]";
2362       OutStreamer.AddComment(CS.str());
2363     }
2364   }
2365 }
2366 
emitInstruction(const MachineInstr * MI)2367 void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
2368   X86MCInstLower MCInstLowering(*MF, *this);
2369   const X86RegisterInfo *RI =
2370       MF->getSubtarget<X86Subtarget>().getRegisterInfo();
2371 
2372   // Add a comment about EVEX-2-VEX compression for AVX-512 instrs that
2373   // are compressed from EVEX encoding to VEX encoding.
2374   if (TM.Options.MCOptions.ShowMCEncoding) {
2375     if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX)
2376       OutStreamer->AddComment("EVEX TO VEX Compression ", false);
2377   }
2378 
2379   // Add comments for values loaded from constant pool.
2380   if (OutStreamer->isVerboseAsm())
2381     addConstantComments(MI, *OutStreamer);
2382 
2383   switch (MI->getOpcode()) {
2384   case TargetOpcode::DBG_VALUE:
2385     llvm_unreachable("Should be handled target independently");
2386 
2387   // Emit nothing here but a comment if we can.
2388   case X86::Int_MemBarrier:
2389     OutStreamer->emitRawComment("MEMBARRIER");
2390     return;
2391 
2392   case X86::EH_RETURN:
2393   case X86::EH_RETURN64: {
2394     // Lower these as normal, but add some comments.
2395     Register Reg = MI->getOperand(0).getReg();
2396     OutStreamer->AddComment(StringRef("eh_return, addr: %") +
2397                             X86ATTInstPrinter::getRegisterName(Reg));
2398     break;
2399   }
2400   case X86::CLEANUPRET: {
2401     // Lower these as normal, but add some comments.
2402     OutStreamer->AddComment("CLEANUPRET");
2403     break;
2404   }
2405 
2406   case X86::CATCHRET: {
2407     // Lower these as normal, but add some comments.
2408     OutStreamer->AddComment("CATCHRET");
2409     break;
2410   }
2411 
2412   case X86::ENDBR32:
2413   case X86::ENDBR64: {
2414     // CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for
2415     // -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be
2416     // non-empty. If MI is the initial ENDBR, place the
2417     // __patchable_function_entries label after ENDBR.
2418     if (CurrentPatchableFunctionEntrySym &&
2419         CurrentPatchableFunctionEntrySym == CurrentFnBegin &&
2420         MI == &MF->front().front()) {
2421       MCInst Inst;
2422       MCInstLowering.Lower(MI, Inst);
2423       EmitAndCountInstruction(Inst);
2424       CurrentPatchableFunctionEntrySym = createTempSymbol("patch");
2425       OutStreamer->emitLabel(CurrentPatchableFunctionEntrySym);
2426       return;
2427     }
2428     break;
2429   }
2430 
2431   case X86::TAILJMPr:
2432   case X86::TAILJMPm:
2433   case X86::TAILJMPd:
2434   case X86::TAILJMPd_CC:
2435   case X86::TAILJMPr64:
2436   case X86::TAILJMPm64:
2437   case X86::TAILJMPd64:
2438   case X86::TAILJMPd64_CC:
2439   case X86::TAILJMPr64_REX:
2440   case X86::TAILJMPm64_REX:
2441     // Lower these as normal, but add some comments.
2442     OutStreamer->AddComment("TAILCALL");
2443     break;
2444 
2445   case X86::TLS_addr32:
2446   case X86::TLS_addr64:
2447   case X86::TLS_base_addr32:
2448   case X86::TLS_base_addr64:
2449     return LowerTlsAddr(MCInstLowering, *MI);
2450 
2451   case X86::MOVPC32r: {
2452     // This is a pseudo op for a two instruction sequence with a label, which
2453     // looks like:
2454     //     call "L1$pb"
2455     // "L1$pb":
2456     //     popl %esi
2457 
2458     // Emit the call.
2459     MCSymbol *PICBase = MF->getPICBaseSymbol();
2460     // FIXME: We would like an efficient form for this, so we don't have to do a
2461     // lot of extra uniquing.
2462     EmitAndCountInstruction(
2463         MCInstBuilder(X86::CALLpcrel32)
2464             .addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
2465 
2466     const X86FrameLowering *FrameLowering =
2467         MF->getSubtarget<X86Subtarget>().getFrameLowering();
2468     bool hasFP = FrameLowering->hasFP(*MF);
2469 
2470     // TODO: This is needed only if we require precise CFA.
2471     bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() &&
2472                                !OutStreamer->getDwarfFrameInfos().back().End;
2473 
2474     int stackGrowth = -RI->getSlotSize();
2475 
2476     if (HasActiveDwarfFrame && !hasFP) {
2477       OutStreamer->emitCFIAdjustCfaOffset(-stackGrowth);
2478     }
2479 
2480     // Emit the label.
2481     OutStreamer->emitLabel(PICBase);
2482 
2483     // popl $reg
2484     EmitAndCountInstruction(
2485         MCInstBuilder(X86::POP32r).addReg(MI->getOperand(0).getReg()));
2486 
2487     if (HasActiveDwarfFrame && !hasFP) {
2488       OutStreamer->emitCFIAdjustCfaOffset(stackGrowth);
2489     }
2490     return;
2491   }
2492 
2493   case X86::ADD32ri: {
2494     // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
2495     if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS)
2496       break;
2497 
2498     // Okay, we have something like:
2499     //  EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL)
2500 
2501     // For this, we want to print something like:
2502     //   MYGLOBAL + (. - PICBASE)
2503     // However, we can't generate a ".", so just emit a new label here and refer
2504     // to it.
2505     MCSymbol *DotSym = OutContext.createTempSymbol();
2506     OutStreamer->emitLabel(DotSym);
2507 
2508     // Now that we have emitted the label, lower the complex operand expression.
2509     MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2));
2510 
2511     const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext);
2512     const MCExpr *PICBase =
2513         MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext);
2514     DotExpr = MCBinaryExpr::createSub(DotExpr, PICBase, OutContext);
2515 
2516     DotExpr = MCBinaryExpr::createAdd(
2517         MCSymbolRefExpr::create(OpSym, OutContext), DotExpr, OutContext);
2518 
2519     EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri)
2520                                 .addReg(MI->getOperand(0).getReg())
2521                                 .addReg(MI->getOperand(1).getReg())
2522                                 .addExpr(DotExpr));
2523     return;
2524   }
2525   case TargetOpcode::STATEPOINT:
2526     return LowerSTATEPOINT(*MI, MCInstLowering);
2527 
2528   case TargetOpcode::FAULTING_OP:
2529     return LowerFAULTING_OP(*MI, MCInstLowering);
2530 
2531   case TargetOpcode::FENTRY_CALL:
2532     return LowerFENTRY_CALL(*MI, MCInstLowering);
2533 
2534   case TargetOpcode::PATCHABLE_OP:
2535     return LowerPATCHABLE_OP(*MI, MCInstLowering);
2536 
2537   case TargetOpcode::STACKMAP:
2538     return LowerSTACKMAP(*MI);
2539 
2540   case TargetOpcode::PATCHPOINT:
2541     return LowerPATCHPOINT(*MI, MCInstLowering);
2542 
2543   case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
2544     return LowerPATCHABLE_FUNCTION_ENTER(*MI, MCInstLowering);
2545 
2546   case TargetOpcode::PATCHABLE_RET:
2547     return LowerPATCHABLE_RET(*MI, MCInstLowering);
2548 
2549   case TargetOpcode::PATCHABLE_TAIL_CALL:
2550     return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering);
2551 
2552   case TargetOpcode::PATCHABLE_EVENT_CALL:
2553     return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering);
2554 
2555   case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
2556     return LowerPATCHABLE_TYPED_EVENT_CALL(*MI, MCInstLowering);
2557 
2558   case X86::MORESTACK_RET:
2559     EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
2560     return;
2561 
2562   case X86::MORESTACK_RET_RESTORE_R10:
2563     // Return, then restore R10.
2564     EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
2565     EmitAndCountInstruction(
2566         MCInstBuilder(X86::MOV64rr).addReg(X86::R10).addReg(X86::RAX));
2567     return;
2568 
2569   case X86::SEH_PushReg:
2570   case X86::SEH_SaveReg:
2571   case X86::SEH_SaveXMM:
2572   case X86::SEH_StackAlloc:
2573   case X86::SEH_StackAlign:
2574   case X86::SEH_SetFrame:
2575   case X86::SEH_PushFrame:
2576   case X86::SEH_EndPrologue:
2577     EmitSEHInstruction(MI);
2578     return;
2579 
2580   case X86::SEH_Epilogue: {
2581     assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
2582     MachineBasicBlock::const_iterator MBBI(MI);
2583     // Check if preceded by a call and emit nop if so.
2584     for (MBBI = PrevCrossBBInst(MBBI);
2585          MBBI != MachineBasicBlock::const_iterator();
2586          MBBI = PrevCrossBBInst(MBBI)) {
2587       // Conservatively assume that pseudo instructions don't emit code and keep
2588       // looking for a call. We may emit an unnecessary nop in some cases.
2589       if (!MBBI->isPseudo()) {
2590         if (MBBI->isCall())
2591           EmitAndCountInstruction(MCInstBuilder(X86::NOOP));
2592         break;
2593       }
2594     }
2595     return;
2596   }
2597   }
2598 
2599   MCInst TmpInst;
2600   MCInstLowering.Lower(MI, TmpInst);
2601 
2602   // Stackmap shadows cannot include branch targets, so we can count the bytes
2603   // in a call towards the shadow, but must ensure that the no thread returns
2604   // in to the stackmap shadow.  The only way to achieve this is if the call
2605   // is at the end of the shadow.
2606   if (MI->isCall()) {
2607     // Count then size of the call towards the shadow
2608     SMShadowTracker.count(TmpInst, getSubtargetInfo(), CodeEmitter.get());
2609     // Then flush the shadow so that we fill with nops before the call, not
2610     // after it.
2611     SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
2612     // Then emit the call
2613     OutStreamer->emitInstruction(TmpInst, getSubtargetInfo());
2614     return;
2615   }
2616 
2617   EmitAndCountInstruction(TmpInst);
2618 }
2619