1 //===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains code to lower X86 MachineInstrs to their corresponding
10 // MCInst records.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "MCTargetDesc/X86ATTInstPrinter.h"
15 #include "MCTargetDesc/X86BaseInfo.h"
16 #include "MCTargetDesc/X86InstComments.h"
17 #include "MCTargetDesc/X86ShuffleDecode.h"
18 #include "MCTargetDesc/X86TargetStreamer.h"
19 #include "X86AsmPrinter.h"
20 #include "X86RegisterInfo.h"
21 #include "X86ShuffleDecodeConstantPool.h"
22 #include "X86Subtarget.h"
23 #include "llvm/ADT/SmallString.h"
24 #include "llvm/ADT/iterator_range.h"
25 #include "llvm/CodeGen/MachineConstantPool.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
28 #include "llvm/CodeGen/MachineOperand.h"
29 #include "llvm/CodeGen/StackMaps.h"
30 #include "llvm/IR/DataLayout.h"
31 #include "llvm/IR/GlobalValue.h"
32 #include "llvm/IR/Mangler.h"
33 #include "llvm/MC/MCAsmInfo.h"
34 #include "llvm/MC/MCCodeEmitter.h"
35 #include "llvm/MC/MCContext.h"
36 #include "llvm/MC/MCExpr.h"
37 #include "llvm/MC/MCFixup.h"
38 #include "llvm/MC/MCInst.h"
39 #include "llvm/MC/MCInstBuilder.h"
40 #include "llvm/MC/MCSection.h"
41 #include "llvm/MC/MCSectionELF.h"
42 #include "llvm/MC/MCStreamer.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/MC/MCSymbolELF.h"
45 #include "llvm/MC/TargetRegistry.h"
46 #include "llvm/Target/TargetLoweringObjectFile.h"
47 #include "llvm/Target/TargetMachine.h"
48 #include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
49 #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
50 #include <string>
51 
52 using namespace llvm;
53 
54 namespace {
55 
56 /// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
57 class X86MCInstLower {
58   MCContext &Ctx;
59   const MachineFunction &MF;
60   const TargetMachine &TM;
61   const MCAsmInfo &MAI;
62   X86AsmPrinter &AsmPrinter;
63 
64 public:
65   X86MCInstLower(const MachineFunction &MF, X86AsmPrinter &asmprinter);
66 
67   std::optional<MCOperand> LowerMachineOperand(const MachineInstr *MI,
68                                                const MachineOperand &MO) const;
69   void Lower(const MachineInstr *MI, MCInst &OutMI) const;
70 
71   MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const;
72   MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
73 
74 private:
75   MachineModuleInfoMachO &getMachOMMI() const;
76 };
77 
78 } // end anonymous namespace
79 
80 /// A RAII helper which defines a region of instructions which can't have
81 /// padding added between them for correctness.
82 struct NoAutoPaddingScope {
83   MCStreamer &OS;
84   const bool OldAllowAutoPadding;
85   NoAutoPaddingScope(MCStreamer &OS)
86       : OS(OS), OldAllowAutoPadding(OS.getAllowAutoPadding()) {
87     changeAndComment(false);
88   }
89   ~NoAutoPaddingScope() { changeAndComment(OldAllowAutoPadding); }
90   void changeAndComment(bool b) {
91     if (b == OS.getAllowAutoPadding())
92       return;
93     OS.setAllowAutoPadding(b);
94     if (b)
95       OS.emitRawComment("autopadding");
96     else
97       OS.emitRawComment("noautopadding");
98   }
99 };
100 
101 // Emit a minimal sequence of nops spanning NumBytes bytes.
102 static void emitX86Nops(MCStreamer &OS, unsigned NumBytes,
103                         const X86Subtarget *Subtarget);
104 
105 void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst,
106                                                  const MCSubtargetInfo &STI,
107                                                  MCCodeEmitter *CodeEmitter) {
108   if (InShadow) {
109     SmallString<256> Code;
110     SmallVector<MCFixup, 4> Fixups;
111     raw_svector_ostream VecOS(Code);
112     CodeEmitter->encodeInstruction(Inst, VecOS, Fixups, STI);
113     CurrentShadowSize += Code.size();
114     if (CurrentShadowSize >= RequiredShadowSize)
115       InShadow = false; // The shadow is big enough. Stop counting.
116   }
117 }
118 
119 void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding(
120     MCStreamer &OutStreamer, const MCSubtargetInfo &STI) {
121   if (InShadow && CurrentShadowSize < RequiredShadowSize) {
122     InShadow = false;
123     emitX86Nops(OutStreamer, RequiredShadowSize - CurrentShadowSize,
124                 &MF->getSubtarget<X86Subtarget>());
125   }
126 }
127 
128 void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) {
129   OutStreamer->emitInstruction(Inst, getSubtargetInfo());
130   SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get());
131 }
132 
133 X86MCInstLower::X86MCInstLower(const MachineFunction &mf,
134                                X86AsmPrinter &asmprinter)
135     : Ctx(mf.getContext()), MF(mf), TM(mf.getTarget()), MAI(*TM.getMCAsmInfo()),
136       AsmPrinter(asmprinter) {}
137 
138 MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
139   return MF.getMMI().getObjFileInfo<MachineModuleInfoMachO>();
140 }
141 
142 /// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol
143 /// operand to an MCSymbol.
144 MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const {
145   const Triple &TT = TM.getTargetTriple();
146   if (MO.isGlobal() && TT.isOSBinFormatELF())
147     return AsmPrinter.getSymbolPreferLocal(*MO.getGlobal());
148 
149   const DataLayout &DL = MF.getDataLayout();
150   assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) &&
151          "Isn't a symbol reference");
152 
153   MCSymbol *Sym = nullptr;
154   SmallString<128> Name;
155   StringRef Suffix;
156 
157   switch (MO.getTargetFlags()) {
158   case X86II::MO_DLLIMPORT:
159     // Handle dllimport linkage.
160     Name += "__imp_";
161     break;
162   case X86II::MO_COFFSTUB:
163     Name += ".refptr.";
164     break;
165   case X86II::MO_DARWIN_NONLAZY:
166   case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
167     Suffix = "$non_lazy_ptr";
168     break;
169   }
170 
171   if (!Suffix.empty())
172     Name += DL.getPrivateGlobalPrefix();
173 
174   if (MO.isGlobal()) {
175     const GlobalValue *GV = MO.getGlobal();
176     AsmPrinter.getNameWithPrefix(Name, GV);
177   } else if (MO.isSymbol()) {
178     Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL);
179   } else if (MO.isMBB()) {
180     assert(Suffix.empty());
181     Sym = MO.getMBB()->getSymbol();
182   }
183 
184   Name += Suffix;
185   if (!Sym)
186     Sym = Ctx.getOrCreateSymbol(Name);
187 
188   // If the target flags on the operand changes the name of the symbol, do that
189   // before we return the symbol.
190   switch (MO.getTargetFlags()) {
191   default:
192     break;
193   case X86II::MO_COFFSTUB: {
194     MachineModuleInfoCOFF &MMICOFF =
195         MF.getMMI().getObjFileInfo<MachineModuleInfoCOFF>();
196     MachineModuleInfoImpl::StubValueTy &StubSym = MMICOFF.getGVStubEntry(Sym);
197     if (!StubSym.getPointer()) {
198       assert(MO.isGlobal() && "Extern symbol not handled yet");
199       StubSym = MachineModuleInfoImpl::StubValueTy(
200           AsmPrinter.getSymbol(MO.getGlobal()), true);
201     }
202     break;
203   }
204   case X86II::MO_DARWIN_NONLAZY:
205   case X86II::MO_DARWIN_NONLAZY_PIC_BASE: {
206     MachineModuleInfoImpl::StubValueTy &StubSym =
207         getMachOMMI().getGVStubEntry(Sym);
208     if (!StubSym.getPointer()) {
209       assert(MO.isGlobal() && "Extern symbol not handled yet");
210       StubSym = MachineModuleInfoImpl::StubValueTy(
211           AsmPrinter.getSymbol(MO.getGlobal()),
212           !MO.getGlobal()->hasInternalLinkage());
213     }
214     break;
215   }
216   }
217 
218   return Sym;
219 }
220 
221 MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
222                                              MCSymbol *Sym) const {
223   // FIXME: We would like an efficient form for this, so we don't have to do a
224   // lot of extra uniquing.
225   const MCExpr *Expr = nullptr;
226   MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
227 
228   switch (MO.getTargetFlags()) {
229   default:
230     llvm_unreachable("Unknown target flag on GV operand");
231   case X86II::MO_NO_FLAG: // No flag.
232   // These affect the name of the symbol, not any suffix.
233   case X86II::MO_DARWIN_NONLAZY:
234   case X86II::MO_DLLIMPORT:
235   case X86II::MO_COFFSTUB:
236     break;
237 
238   case X86II::MO_TLVP:
239     RefKind = MCSymbolRefExpr::VK_TLVP;
240     break;
241   case X86II::MO_TLVP_PIC_BASE:
242     Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
243     // Subtract the pic base.
244     Expr = MCBinaryExpr::createSub(
245         Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx);
246     break;
247   case X86II::MO_SECREL:
248     RefKind = MCSymbolRefExpr::VK_SECREL;
249     break;
250   case X86II::MO_TLSGD:
251     RefKind = MCSymbolRefExpr::VK_TLSGD;
252     break;
253   case X86II::MO_TLSLD:
254     RefKind = MCSymbolRefExpr::VK_TLSLD;
255     break;
256   case X86II::MO_TLSLDM:
257     RefKind = MCSymbolRefExpr::VK_TLSLDM;
258     break;
259   case X86II::MO_GOTTPOFF:
260     RefKind = MCSymbolRefExpr::VK_GOTTPOFF;
261     break;
262   case X86II::MO_INDNTPOFF:
263     RefKind = MCSymbolRefExpr::VK_INDNTPOFF;
264     break;
265   case X86II::MO_TPOFF:
266     RefKind = MCSymbolRefExpr::VK_TPOFF;
267     break;
268   case X86II::MO_DTPOFF:
269     RefKind = MCSymbolRefExpr::VK_DTPOFF;
270     break;
271   case X86II::MO_NTPOFF:
272     RefKind = MCSymbolRefExpr::VK_NTPOFF;
273     break;
274   case X86II::MO_GOTNTPOFF:
275     RefKind = MCSymbolRefExpr::VK_GOTNTPOFF;
276     break;
277   case X86II::MO_GOTPCREL:
278     RefKind = MCSymbolRefExpr::VK_GOTPCREL;
279     break;
280   case X86II::MO_GOTPCREL_NORELAX:
281     RefKind = MCSymbolRefExpr::VK_GOTPCREL_NORELAX;
282     break;
283   case X86II::MO_GOT:
284     RefKind = MCSymbolRefExpr::VK_GOT;
285     break;
286   case X86II::MO_GOTOFF:
287     RefKind = MCSymbolRefExpr::VK_GOTOFF;
288     break;
289   case X86II::MO_PLT:
290     RefKind = MCSymbolRefExpr::VK_PLT;
291     break;
292   case X86II::MO_ABS8:
293     RefKind = MCSymbolRefExpr::VK_X86_ABS8;
294     break;
295   case X86II::MO_PIC_BASE_OFFSET:
296   case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
297     Expr = MCSymbolRefExpr::create(Sym, Ctx);
298     // Subtract the pic base.
299     Expr = MCBinaryExpr::createSub(
300         Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx);
301     if (MO.isJTI()) {
302       assert(MAI.doesSetDirectiveSuppressReloc());
303       // If .set directive is supported, use it to reduce the number of
304       // relocations the assembler will generate for differences between
305       // local labels. This is only safe when the symbols are in the same
306       // section so we are restricting it to jumptable references.
307       MCSymbol *Label = Ctx.createTempSymbol();
308       AsmPrinter.OutStreamer->emitAssignment(Label, Expr);
309       Expr = MCSymbolRefExpr::create(Label, Ctx);
310     }
311     break;
312   }
313 
314   if (!Expr)
315     Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx);
316 
317   if (!MO.isJTI() && !MO.isMBB() && MO.getOffset())
318     Expr = MCBinaryExpr::createAdd(
319         Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
320   return MCOperand::createExpr(Expr);
321 }
322 
323 /// Simplify FOO $imm, %{al,ax,eax,rax} to FOO $imm, for instruction with
324 /// a short fixed-register form.
325 static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) {
326   unsigned ImmOp = Inst.getNumOperands() - 1;
327   assert(Inst.getOperand(0).isReg() &&
328          (Inst.getOperand(ImmOp).isImm() || Inst.getOperand(ImmOp).isExpr()) &&
329          ((Inst.getNumOperands() == 3 && Inst.getOperand(1).isReg() &&
330            Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) ||
331           Inst.getNumOperands() == 2) &&
332          "Unexpected instruction!");
333 
334   // Check whether the destination register can be fixed.
335   unsigned Reg = Inst.getOperand(0).getReg();
336   if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
337     return;
338 
339   // If so, rewrite the instruction.
340   MCOperand Saved = Inst.getOperand(ImmOp);
341   Inst = MCInst();
342   Inst.setOpcode(Opcode);
343   Inst.addOperand(Saved);
344 }
345 
346 /// If a movsx instruction has a shorter encoding for the used register
347 /// simplify the instruction to use it instead.
348 static void SimplifyMOVSX(MCInst &Inst) {
349   unsigned NewOpcode = 0;
350   unsigned Op0 = Inst.getOperand(0).getReg(), Op1 = Inst.getOperand(1).getReg();
351   switch (Inst.getOpcode()) {
352   default:
353     llvm_unreachable("Unexpected instruction!");
354   case X86::MOVSX16rr8: // movsbw %al, %ax   --> cbtw
355     if (Op0 == X86::AX && Op1 == X86::AL)
356       NewOpcode = X86::CBW;
357     break;
358   case X86::MOVSX32rr16: // movswl %ax, %eax  --> cwtl
359     if (Op0 == X86::EAX && Op1 == X86::AX)
360       NewOpcode = X86::CWDE;
361     break;
362   case X86::MOVSX64rr32: // movslq %eax, %rax --> cltq
363     if (Op0 == X86::RAX && Op1 == X86::EAX)
364       NewOpcode = X86::CDQE;
365     break;
366   }
367 
368   if (NewOpcode != 0) {
369     Inst = MCInst();
370     Inst.setOpcode(NewOpcode);
371   }
372 }
373 
374 /// Simplify things like MOV32rm to MOV32o32a.
375 static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
376                                   unsigned Opcode) {
377   // Don't make these simplifications in 64-bit mode; other assemblers don't
378   // perform them because they make the code larger.
379   if (Printer.getSubtarget().is64Bit())
380     return;
381 
382   bool IsStore = Inst.getOperand(0).isReg() && Inst.getOperand(1).isReg();
383   unsigned AddrBase = IsStore;
384   unsigned RegOp = IsStore ? 0 : 5;
385   unsigned AddrOp = AddrBase + 3;
386   assert(
387       Inst.getNumOperands() == 6 && Inst.getOperand(RegOp).isReg() &&
388       Inst.getOperand(AddrBase + X86::AddrBaseReg).isReg() &&
389       Inst.getOperand(AddrBase + X86::AddrScaleAmt).isImm() &&
390       Inst.getOperand(AddrBase + X86::AddrIndexReg).isReg() &&
391       Inst.getOperand(AddrBase + X86::AddrSegmentReg).isReg() &&
392       (Inst.getOperand(AddrOp).isExpr() || Inst.getOperand(AddrOp).isImm()) &&
393       "Unexpected instruction!");
394 
395   // Check whether the destination register can be fixed.
396   unsigned Reg = Inst.getOperand(RegOp).getReg();
397   if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
398     return;
399 
400   // Check whether this is an absolute address.
401   // FIXME: We know TLVP symbol refs aren't, but there should be a better way
402   // to do this here.
403   bool Absolute = true;
404   if (Inst.getOperand(AddrOp).isExpr()) {
405     const MCExpr *MCE = Inst.getOperand(AddrOp).getExpr();
406     if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(MCE))
407       if (SRE->getKind() == MCSymbolRefExpr::VK_TLVP)
408         Absolute = false;
409   }
410 
411   if (Absolute &&
412       (Inst.getOperand(AddrBase + X86::AddrBaseReg).getReg() != 0 ||
413        Inst.getOperand(AddrBase + X86::AddrScaleAmt).getImm() != 1 ||
414        Inst.getOperand(AddrBase + X86::AddrIndexReg).getReg() != 0))
415     return;
416 
417   // If so, rewrite the instruction.
418   MCOperand Saved = Inst.getOperand(AddrOp);
419   MCOperand Seg = Inst.getOperand(AddrBase + X86::AddrSegmentReg);
420   Inst = MCInst();
421   Inst.setOpcode(Opcode);
422   Inst.addOperand(Saved);
423   Inst.addOperand(Seg);
424 }
425 
426 static unsigned getRetOpcode(const X86Subtarget &Subtarget) {
427   return Subtarget.is64Bit() ? X86::RET64 : X86::RET32;
428 }
429 
430 std::optional<MCOperand>
431 X86MCInstLower::LowerMachineOperand(const MachineInstr *MI,
432                                     const MachineOperand &MO) const {
433   switch (MO.getType()) {
434   default:
435     MI->print(errs());
436     llvm_unreachable("unknown operand type");
437   case MachineOperand::MO_Register:
438     // Ignore all implicit register operands.
439     if (MO.isImplicit())
440       return std::nullopt;
441     return MCOperand::createReg(MO.getReg());
442   case MachineOperand::MO_Immediate:
443     return MCOperand::createImm(MO.getImm());
444   case MachineOperand::MO_MachineBasicBlock:
445   case MachineOperand::MO_GlobalAddress:
446   case MachineOperand::MO_ExternalSymbol:
447     return LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
448   case MachineOperand::MO_MCSymbol:
449     return LowerSymbolOperand(MO, MO.getMCSymbol());
450   case MachineOperand::MO_JumpTableIndex:
451     return LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()));
452   case MachineOperand::MO_ConstantPoolIndex:
453     return LowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex()));
454   case MachineOperand::MO_BlockAddress:
455     return LowerSymbolOperand(
456         MO, AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()));
457   case MachineOperand::MO_RegisterMask:
458     // Ignore call clobbers.
459     return std::nullopt;
460   }
461 }
462 
463 // Replace TAILJMP opcodes with their equivalent opcodes that have encoding
464 // information.
465 static unsigned convertTailJumpOpcode(unsigned Opcode) {
466   switch (Opcode) {
467   case X86::TAILJMPr:
468     Opcode = X86::JMP32r;
469     break;
470   case X86::TAILJMPm:
471     Opcode = X86::JMP32m;
472     break;
473   case X86::TAILJMPr64:
474     Opcode = X86::JMP64r;
475     break;
476   case X86::TAILJMPm64:
477     Opcode = X86::JMP64m;
478     break;
479   case X86::TAILJMPr64_REX:
480     Opcode = X86::JMP64r_REX;
481     break;
482   case X86::TAILJMPm64_REX:
483     Opcode = X86::JMP64m_REX;
484     break;
485   case X86::TAILJMPd:
486   case X86::TAILJMPd64:
487     Opcode = X86::JMP_1;
488     break;
489   case X86::TAILJMPd_CC:
490   case X86::TAILJMPd64_CC:
491     Opcode = X86::JCC_1;
492     break;
493   }
494 
495   return Opcode;
496 }
497 
498 void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
499   OutMI.setOpcode(MI->getOpcode());
500 
501   for (const MachineOperand &MO : MI->operands())
502     if (auto MaybeMCOp = LowerMachineOperand(MI, MO))
503       OutMI.addOperand(*MaybeMCOp);
504 
505   // Handle a few special cases to eliminate operand modifiers.
506   switch (OutMI.getOpcode()) {
507   case X86::LEA64_32r:
508   case X86::LEA64r:
509   case X86::LEA16r:
510   case X86::LEA32r:
511     // LEA should have a segment register, but it must be empty.
512     assert(OutMI.getNumOperands() == 1 + X86::AddrNumOperands &&
513            "Unexpected # of LEA operands");
514     assert(OutMI.getOperand(1 + X86::AddrSegmentReg).getReg() == 0 &&
515            "LEA has segment specified!");
516     break;
517 
518   case X86::MULX32Hrr:
519   case X86::MULX32Hrm:
520   case X86::MULX64Hrr:
521   case X86::MULX64Hrm: {
522     // Turn into regular MULX by duplicating the destination.
523     unsigned NewOpc;
524     switch (OutMI.getOpcode()) {
525     default: llvm_unreachable("Invalid opcode");
526     case X86::MULX32Hrr: NewOpc = X86::MULX32rr; break;
527     case X86::MULX32Hrm: NewOpc = X86::MULX32rm; break;
528     case X86::MULX64Hrr: NewOpc = X86::MULX64rr; break;
529     case X86::MULX64Hrm: NewOpc = X86::MULX64rm; break;
530     }
531     OutMI.setOpcode(NewOpc);
532     // Duplicate the destination.
533     unsigned DestReg = OutMI.getOperand(0).getReg();
534     OutMI.insert(OutMI.begin(), MCOperand::createReg(DestReg));
535     break;
536   }
537 
538   // Commute operands to get a smaller encoding by using VEX.R instead of VEX.B
539   // if one of the registers is extended, but other isn't.
540   case X86::VMOVZPQILo2PQIrr:
541   case X86::VMOVAPDrr:
542   case X86::VMOVAPDYrr:
543   case X86::VMOVAPSrr:
544   case X86::VMOVAPSYrr:
545   case X86::VMOVDQArr:
546   case X86::VMOVDQAYrr:
547   case X86::VMOVDQUrr:
548   case X86::VMOVDQUYrr:
549   case X86::VMOVUPDrr:
550   case X86::VMOVUPDYrr:
551   case X86::VMOVUPSrr:
552   case X86::VMOVUPSYrr: {
553     if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
554         X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg())) {
555       unsigned NewOpc;
556       switch (OutMI.getOpcode()) {
557       default: llvm_unreachable("Invalid opcode");
558       case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr;   break;
559       case X86::VMOVAPDrr:        NewOpc = X86::VMOVAPDrr_REV;  break;
560       case X86::VMOVAPDYrr:       NewOpc = X86::VMOVAPDYrr_REV; break;
561       case X86::VMOVAPSrr:        NewOpc = X86::VMOVAPSrr_REV;  break;
562       case X86::VMOVAPSYrr:       NewOpc = X86::VMOVAPSYrr_REV; break;
563       case X86::VMOVDQArr:        NewOpc = X86::VMOVDQArr_REV;  break;
564       case X86::VMOVDQAYrr:       NewOpc = X86::VMOVDQAYrr_REV; break;
565       case X86::VMOVDQUrr:        NewOpc = X86::VMOVDQUrr_REV;  break;
566       case X86::VMOVDQUYrr:       NewOpc = X86::VMOVDQUYrr_REV; break;
567       case X86::VMOVUPDrr:        NewOpc = X86::VMOVUPDrr_REV;  break;
568       case X86::VMOVUPDYrr:       NewOpc = X86::VMOVUPDYrr_REV; break;
569       case X86::VMOVUPSrr:        NewOpc = X86::VMOVUPSrr_REV;  break;
570       case X86::VMOVUPSYrr:       NewOpc = X86::VMOVUPSYrr_REV; break;
571       }
572       OutMI.setOpcode(NewOpc);
573     }
574     break;
575   }
576   case X86::VMOVSDrr:
577   case X86::VMOVSSrr: {
578     if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
579         X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
580       unsigned NewOpc;
581       switch (OutMI.getOpcode()) {
582       default: llvm_unreachable("Invalid opcode");
583       case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
584       case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
585       }
586       OutMI.setOpcode(NewOpc);
587     }
588     break;
589   }
590 
591   case X86::VPCMPBZ128rmi:  case X86::VPCMPBZ128rmik:
592   case X86::VPCMPBZ128rri:  case X86::VPCMPBZ128rrik:
593   case X86::VPCMPBZ256rmi:  case X86::VPCMPBZ256rmik:
594   case X86::VPCMPBZ256rri:  case X86::VPCMPBZ256rrik:
595   case X86::VPCMPBZrmi:     case X86::VPCMPBZrmik:
596   case X86::VPCMPBZrri:     case X86::VPCMPBZrrik:
597   case X86::VPCMPDZ128rmi:  case X86::VPCMPDZ128rmik:
598   case X86::VPCMPDZ128rmib: case X86::VPCMPDZ128rmibk:
599   case X86::VPCMPDZ128rri:  case X86::VPCMPDZ128rrik:
600   case X86::VPCMPDZ256rmi:  case X86::VPCMPDZ256rmik:
601   case X86::VPCMPDZ256rmib: case X86::VPCMPDZ256rmibk:
602   case X86::VPCMPDZ256rri:  case X86::VPCMPDZ256rrik:
603   case X86::VPCMPDZrmi:     case X86::VPCMPDZrmik:
604   case X86::VPCMPDZrmib:    case X86::VPCMPDZrmibk:
605   case X86::VPCMPDZrri:     case X86::VPCMPDZrrik:
606   case X86::VPCMPQZ128rmi:  case X86::VPCMPQZ128rmik:
607   case X86::VPCMPQZ128rmib: case X86::VPCMPQZ128rmibk:
608   case X86::VPCMPQZ128rri:  case X86::VPCMPQZ128rrik:
609   case X86::VPCMPQZ256rmi:  case X86::VPCMPQZ256rmik:
610   case X86::VPCMPQZ256rmib: case X86::VPCMPQZ256rmibk:
611   case X86::VPCMPQZ256rri:  case X86::VPCMPQZ256rrik:
612   case X86::VPCMPQZrmi:     case X86::VPCMPQZrmik:
613   case X86::VPCMPQZrmib:    case X86::VPCMPQZrmibk:
614   case X86::VPCMPQZrri:     case X86::VPCMPQZrrik:
615   case X86::VPCMPWZ128rmi:  case X86::VPCMPWZ128rmik:
616   case X86::VPCMPWZ128rri:  case X86::VPCMPWZ128rrik:
617   case X86::VPCMPWZ256rmi:  case X86::VPCMPWZ256rmik:
618   case X86::VPCMPWZ256rri:  case X86::VPCMPWZ256rrik:
619   case X86::VPCMPWZrmi:     case X86::VPCMPWZrmik:
620   case X86::VPCMPWZrri:     case X86::VPCMPWZrrik: {
621     // Turn immediate 0 into the VPCMPEQ instruction.
622     if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 0) {
623       unsigned NewOpc;
624       switch (OutMI.getOpcode()) {
625       default: llvm_unreachable("Invalid opcode");
626       case X86::VPCMPBZ128rmi:   NewOpc = X86::VPCMPEQBZ128rm;   break;
627       case X86::VPCMPBZ128rmik:  NewOpc = X86::VPCMPEQBZ128rmk;  break;
628       case X86::VPCMPBZ128rri:   NewOpc = X86::VPCMPEQBZ128rr;   break;
629       case X86::VPCMPBZ128rrik:  NewOpc = X86::VPCMPEQBZ128rrk;  break;
630       case X86::VPCMPBZ256rmi:   NewOpc = X86::VPCMPEQBZ256rm;   break;
631       case X86::VPCMPBZ256rmik:  NewOpc = X86::VPCMPEQBZ256rmk;  break;
632       case X86::VPCMPBZ256rri:   NewOpc = X86::VPCMPEQBZ256rr;   break;
633       case X86::VPCMPBZ256rrik:  NewOpc = X86::VPCMPEQBZ256rrk;  break;
634       case X86::VPCMPBZrmi:      NewOpc = X86::VPCMPEQBZrm;      break;
635       case X86::VPCMPBZrmik:     NewOpc = X86::VPCMPEQBZrmk;     break;
636       case X86::VPCMPBZrri:      NewOpc = X86::VPCMPEQBZrr;      break;
637       case X86::VPCMPBZrrik:     NewOpc = X86::VPCMPEQBZrrk;     break;
638       case X86::VPCMPDZ128rmi:   NewOpc = X86::VPCMPEQDZ128rm;   break;
639       case X86::VPCMPDZ128rmib:  NewOpc = X86::VPCMPEQDZ128rmb;  break;
640       case X86::VPCMPDZ128rmibk: NewOpc = X86::VPCMPEQDZ128rmbk; break;
641       case X86::VPCMPDZ128rmik:  NewOpc = X86::VPCMPEQDZ128rmk;  break;
642       case X86::VPCMPDZ128rri:   NewOpc = X86::VPCMPEQDZ128rr;   break;
643       case X86::VPCMPDZ128rrik:  NewOpc = X86::VPCMPEQDZ128rrk;  break;
644       case X86::VPCMPDZ256rmi:   NewOpc = X86::VPCMPEQDZ256rm;   break;
645       case X86::VPCMPDZ256rmib:  NewOpc = X86::VPCMPEQDZ256rmb;  break;
646       case X86::VPCMPDZ256rmibk: NewOpc = X86::VPCMPEQDZ256rmbk; break;
647       case X86::VPCMPDZ256rmik:  NewOpc = X86::VPCMPEQDZ256rmk;  break;
648       case X86::VPCMPDZ256rri:   NewOpc = X86::VPCMPEQDZ256rr;   break;
649       case X86::VPCMPDZ256rrik:  NewOpc = X86::VPCMPEQDZ256rrk;  break;
650       case X86::VPCMPDZrmi:      NewOpc = X86::VPCMPEQDZrm;      break;
651       case X86::VPCMPDZrmib:     NewOpc = X86::VPCMPEQDZrmb;     break;
652       case X86::VPCMPDZrmibk:    NewOpc = X86::VPCMPEQDZrmbk;    break;
653       case X86::VPCMPDZrmik:     NewOpc = X86::VPCMPEQDZrmk;     break;
654       case X86::VPCMPDZrri:      NewOpc = X86::VPCMPEQDZrr;      break;
655       case X86::VPCMPDZrrik:     NewOpc = X86::VPCMPEQDZrrk;     break;
656       case X86::VPCMPQZ128rmi:   NewOpc = X86::VPCMPEQQZ128rm;   break;
657       case X86::VPCMPQZ128rmib:  NewOpc = X86::VPCMPEQQZ128rmb;  break;
658       case X86::VPCMPQZ128rmibk: NewOpc = X86::VPCMPEQQZ128rmbk; break;
659       case X86::VPCMPQZ128rmik:  NewOpc = X86::VPCMPEQQZ128rmk;  break;
660       case X86::VPCMPQZ128rri:   NewOpc = X86::VPCMPEQQZ128rr;   break;
661       case X86::VPCMPQZ128rrik:  NewOpc = X86::VPCMPEQQZ128rrk;  break;
662       case X86::VPCMPQZ256rmi:   NewOpc = X86::VPCMPEQQZ256rm;   break;
663       case X86::VPCMPQZ256rmib:  NewOpc = X86::VPCMPEQQZ256rmb;  break;
664       case X86::VPCMPQZ256rmibk: NewOpc = X86::VPCMPEQQZ256rmbk; break;
665       case X86::VPCMPQZ256rmik:  NewOpc = X86::VPCMPEQQZ256rmk;  break;
666       case X86::VPCMPQZ256rri:   NewOpc = X86::VPCMPEQQZ256rr;   break;
667       case X86::VPCMPQZ256rrik:  NewOpc = X86::VPCMPEQQZ256rrk;  break;
668       case X86::VPCMPQZrmi:      NewOpc = X86::VPCMPEQQZrm;      break;
669       case X86::VPCMPQZrmib:     NewOpc = X86::VPCMPEQQZrmb;     break;
670       case X86::VPCMPQZrmibk:    NewOpc = X86::VPCMPEQQZrmbk;    break;
671       case X86::VPCMPQZrmik:     NewOpc = X86::VPCMPEQQZrmk;     break;
672       case X86::VPCMPQZrri:      NewOpc = X86::VPCMPEQQZrr;      break;
673       case X86::VPCMPQZrrik:     NewOpc = X86::VPCMPEQQZrrk;     break;
674       case X86::VPCMPWZ128rmi:   NewOpc = X86::VPCMPEQWZ128rm;   break;
675       case X86::VPCMPWZ128rmik:  NewOpc = X86::VPCMPEQWZ128rmk;  break;
676       case X86::VPCMPWZ128rri:   NewOpc = X86::VPCMPEQWZ128rr;   break;
677       case X86::VPCMPWZ128rrik:  NewOpc = X86::VPCMPEQWZ128rrk;  break;
678       case X86::VPCMPWZ256rmi:   NewOpc = X86::VPCMPEQWZ256rm;   break;
679       case X86::VPCMPWZ256rmik:  NewOpc = X86::VPCMPEQWZ256rmk;  break;
680       case X86::VPCMPWZ256rri:   NewOpc = X86::VPCMPEQWZ256rr;   break;
681       case X86::VPCMPWZ256rrik:  NewOpc = X86::VPCMPEQWZ256rrk;  break;
682       case X86::VPCMPWZrmi:      NewOpc = X86::VPCMPEQWZrm;      break;
683       case X86::VPCMPWZrmik:     NewOpc = X86::VPCMPEQWZrmk;     break;
684       case X86::VPCMPWZrri:      NewOpc = X86::VPCMPEQWZrr;      break;
685       case X86::VPCMPWZrrik:     NewOpc = X86::VPCMPEQWZrrk;     break;
686       }
687 
688       OutMI.setOpcode(NewOpc);
689       OutMI.erase(&OutMI.getOperand(OutMI.getNumOperands() - 1));
690       break;
691     }
692 
693     // Turn immediate 6 into the VPCMPGT instruction.
694     if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 6) {
695       unsigned NewOpc;
696       switch (OutMI.getOpcode()) {
697       default: llvm_unreachable("Invalid opcode");
698       case X86::VPCMPBZ128rmi:   NewOpc = X86::VPCMPGTBZ128rm;   break;
699       case X86::VPCMPBZ128rmik:  NewOpc = X86::VPCMPGTBZ128rmk;  break;
700       case X86::VPCMPBZ128rri:   NewOpc = X86::VPCMPGTBZ128rr;   break;
701       case X86::VPCMPBZ128rrik:  NewOpc = X86::VPCMPGTBZ128rrk;  break;
702       case X86::VPCMPBZ256rmi:   NewOpc = X86::VPCMPGTBZ256rm;   break;
703       case X86::VPCMPBZ256rmik:  NewOpc = X86::VPCMPGTBZ256rmk;  break;
704       case X86::VPCMPBZ256rri:   NewOpc = X86::VPCMPGTBZ256rr;   break;
705       case X86::VPCMPBZ256rrik:  NewOpc = X86::VPCMPGTBZ256rrk;  break;
706       case X86::VPCMPBZrmi:      NewOpc = X86::VPCMPGTBZrm;      break;
707       case X86::VPCMPBZrmik:     NewOpc = X86::VPCMPGTBZrmk;     break;
708       case X86::VPCMPBZrri:      NewOpc = X86::VPCMPGTBZrr;      break;
709       case X86::VPCMPBZrrik:     NewOpc = X86::VPCMPGTBZrrk;     break;
710       case X86::VPCMPDZ128rmi:   NewOpc = X86::VPCMPGTDZ128rm;   break;
711       case X86::VPCMPDZ128rmib:  NewOpc = X86::VPCMPGTDZ128rmb;  break;
712       case X86::VPCMPDZ128rmibk: NewOpc = X86::VPCMPGTDZ128rmbk; break;
713       case X86::VPCMPDZ128rmik:  NewOpc = X86::VPCMPGTDZ128rmk;  break;
714       case X86::VPCMPDZ128rri:   NewOpc = X86::VPCMPGTDZ128rr;   break;
715       case X86::VPCMPDZ128rrik:  NewOpc = X86::VPCMPGTDZ128rrk;  break;
716       case X86::VPCMPDZ256rmi:   NewOpc = X86::VPCMPGTDZ256rm;   break;
717       case X86::VPCMPDZ256rmib:  NewOpc = X86::VPCMPGTDZ256rmb;  break;
718       case X86::VPCMPDZ256rmibk: NewOpc = X86::VPCMPGTDZ256rmbk; break;
719       case X86::VPCMPDZ256rmik:  NewOpc = X86::VPCMPGTDZ256rmk;  break;
720       case X86::VPCMPDZ256rri:   NewOpc = X86::VPCMPGTDZ256rr;   break;
721       case X86::VPCMPDZ256rrik:  NewOpc = X86::VPCMPGTDZ256rrk;  break;
722       case X86::VPCMPDZrmi:      NewOpc = X86::VPCMPGTDZrm;      break;
723       case X86::VPCMPDZrmib:     NewOpc = X86::VPCMPGTDZrmb;     break;
724       case X86::VPCMPDZrmibk:    NewOpc = X86::VPCMPGTDZrmbk;    break;
725       case X86::VPCMPDZrmik:     NewOpc = X86::VPCMPGTDZrmk;     break;
726       case X86::VPCMPDZrri:      NewOpc = X86::VPCMPGTDZrr;      break;
727       case X86::VPCMPDZrrik:     NewOpc = X86::VPCMPGTDZrrk;     break;
728       case X86::VPCMPQZ128rmi:   NewOpc = X86::VPCMPGTQZ128rm;   break;
729       case X86::VPCMPQZ128rmib:  NewOpc = X86::VPCMPGTQZ128rmb;  break;
730       case X86::VPCMPQZ128rmibk: NewOpc = X86::VPCMPGTQZ128rmbk; break;
731       case X86::VPCMPQZ128rmik:  NewOpc = X86::VPCMPGTQZ128rmk;  break;
732       case X86::VPCMPQZ128rri:   NewOpc = X86::VPCMPGTQZ128rr;   break;
733       case X86::VPCMPQZ128rrik:  NewOpc = X86::VPCMPGTQZ128rrk;  break;
734       case X86::VPCMPQZ256rmi:   NewOpc = X86::VPCMPGTQZ256rm;   break;
735       case X86::VPCMPQZ256rmib:  NewOpc = X86::VPCMPGTQZ256rmb;  break;
736       case X86::VPCMPQZ256rmibk: NewOpc = X86::VPCMPGTQZ256rmbk; break;
737       case X86::VPCMPQZ256rmik:  NewOpc = X86::VPCMPGTQZ256rmk;  break;
738       case X86::VPCMPQZ256rri:   NewOpc = X86::VPCMPGTQZ256rr;   break;
739       case X86::VPCMPQZ256rrik:  NewOpc = X86::VPCMPGTQZ256rrk;  break;
740       case X86::VPCMPQZrmi:      NewOpc = X86::VPCMPGTQZrm;      break;
741       case X86::VPCMPQZrmib:     NewOpc = X86::VPCMPGTQZrmb;     break;
742       case X86::VPCMPQZrmibk:    NewOpc = X86::VPCMPGTQZrmbk;    break;
743       case X86::VPCMPQZrmik:     NewOpc = X86::VPCMPGTQZrmk;     break;
744       case X86::VPCMPQZrri:      NewOpc = X86::VPCMPGTQZrr;      break;
745       case X86::VPCMPQZrrik:     NewOpc = X86::VPCMPGTQZrrk;     break;
746       case X86::VPCMPWZ128rmi:   NewOpc = X86::VPCMPGTWZ128rm;   break;
747       case X86::VPCMPWZ128rmik:  NewOpc = X86::VPCMPGTWZ128rmk;  break;
748       case X86::VPCMPWZ128rri:   NewOpc = X86::VPCMPGTWZ128rr;   break;
749       case X86::VPCMPWZ128rrik:  NewOpc = X86::VPCMPGTWZ128rrk;  break;
750       case X86::VPCMPWZ256rmi:   NewOpc = X86::VPCMPGTWZ256rm;   break;
751       case X86::VPCMPWZ256rmik:  NewOpc = X86::VPCMPGTWZ256rmk;  break;
752       case X86::VPCMPWZ256rri:   NewOpc = X86::VPCMPGTWZ256rr;   break;
753       case X86::VPCMPWZ256rrik:  NewOpc = X86::VPCMPGTWZ256rrk;  break;
754       case X86::VPCMPWZrmi:      NewOpc = X86::VPCMPGTWZrm;      break;
755       case X86::VPCMPWZrmik:     NewOpc = X86::VPCMPGTWZrmk;     break;
756       case X86::VPCMPWZrri:      NewOpc = X86::VPCMPGTWZrr;      break;
757       case X86::VPCMPWZrrik:     NewOpc = X86::VPCMPGTWZrrk;     break;
758       }
759 
760       OutMI.setOpcode(NewOpc);
761       OutMI.erase(&OutMI.getOperand(OutMI.getNumOperands() - 1));
762       break;
763     }
764 
765     break;
766   }
767 
768   // CALL64r, CALL64pcrel32 - These instructions used to have
769   // register inputs modeled as normal uses instead of implicit uses.  As such,
770   // they we used to truncate off all but the first operand (the callee). This
771   // issue seems to have been fixed at some point. This assert verifies that.
772   case X86::CALL64r:
773   case X86::CALL64pcrel32:
774     assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!");
775     break;
776 
777   case X86::EH_RETURN:
778   case X86::EH_RETURN64: {
779     OutMI = MCInst();
780     OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
781     break;
782   }
783 
784   case X86::CLEANUPRET: {
785     // Replace CLEANUPRET with the appropriate RET.
786     OutMI = MCInst();
787     OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
788     break;
789   }
790 
791   case X86::CATCHRET: {
792     // Replace CATCHRET with the appropriate RET.
793     const X86Subtarget &Subtarget = AsmPrinter.getSubtarget();
794     unsigned ReturnReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
795     OutMI = MCInst();
796     OutMI.setOpcode(getRetOpcode(Subtarget));
797     OutMI.addOperand(MCOperand::createReg(ReturnReg));
798     break;
799   }
800 
801   // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump
802   // instruction.
803   case X86::TAILJMPr:
804   case X86::TAILJMPr64:
805   case X86::TAILJMPr64_REX:
806   case X86::TAILJMPd:
807   case X86::TAILJMPd64:
808     assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!");
809     OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
810     break;
811 
812   case X86::TAILJMPd_CC:
813   case X86::TAILJMPd64_CC:
814     assert(OutMI.getNumOperands() == 2 && "Unexpected number of operands!");
815     OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
816     break;
817 
818   case X86::TAILJMPm:
819   case X86::TAILJMPm64:
820   case X86::TAILJMPm64_REX:
821     assert(OutMI.getNumOperands() == X86::AddrNumOperands &&
822            "Unexpected number of operands!");
823     OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
824     break;
825 
826   case X86::DEC16r:
827   case X86::DEC32r:
828   case X86::INC16r:
829   case X86::INC32r:
830     // If we aren't in 64-bit mode we can use the 1-byte inc/dec instructions.
831     if (!AsmPrinter.getSubtarget().is64Bit()) {
832       unsigned Opcode;
833       switch (OutMI.getOpcode()) {
834       default: llvm_unreachable("Invalid opcode");
835       case X86::DEC16r: Opcode = X86::DEC16r_alt; break;
836       case X86::DEC32r: Opcode = X86::DEC32r_alt; break;
837       case X86::INC16r: Opcode = X86::INC16r_alt; break;
838       case X86::INC32r: Opcode = X86::INC32r_alt; break;
839       }
840       OutMI.setOpcode(Opcode);
841     }
842     break;
843 
844   // We don't currently select the correct instruction form for instructions
845   // which have a short %eax, etc. form. Handle this by custom lowering, for
846   // now.
847   //
848   // Note, we are currently not handling the following instructions:
849   // MOV64ao8, MOV64o8a
850   // XCHG16ar, XCHG32ar, XCHG64ar
851   case X86::MOV8mr_NOREX:
852   case X86::MOV8mr:
853   case X86::MOV8rm_NOREX:
854   case X86::MOV8rm:
855   case X86::MOV16mr:
856   case X86::MOV16rm:
857   case X86::MOV32mr:
858   case X86::MOV32rm: {
859     unsigned NewOpc;
860     switch (OutMI.getOpcode()) {
861     default: llvm_unreachable("Invalid opcode");
862     case X86::MOV8mr_NOREX:
863     case X86::MOV8mr:  NewOpc = X86::MOV8o32a; break;
864     case X86::MOV8rm_NOREX:
865     case X86::MOV8rm:  NewOpc = X86::MOV8ao32; break;
866     case X86::MOV16mr: NewOpc = X86::MOV16o32a; break;
867     case X86::MOV16rm: NewOpc = X86::MOV16ao32; break;
868     case X86::MOV32mr: NewOpc = X86::MOV32o32a; break;
869     case X86::MOV32rm: NewOpc = X86::MOV32ao32; break;
870     }
871     SimplifyShortMoveForm(AsmPrinter, OutMI, NewOpc);
872     break;
873   }
874 
875   case X86::ADC8ri: case X86::ADC16ri: case X86::ADC32ri: case X86::ADC64ri32:
876   case X86::ADD8ri: case X86::ADD16ri: case X86::ADD32ri: case X86::ADD64ri32:
877   case X86::AND8ri: case X86::AND16ri: case X86::AND32ri: case X86::AND64ri32:
878   case X86::CMP8ri: case X86::CMP16ri: case X86::CMP32ri: case X86::CMP64ri32:
879   case X86::OR8ri:  case X86::OR16ri:  case X86::OR32ri:  case X86::OR64ri32:
880   case X86::SBB8ri: case X86::SBB16ri: case X86::SBB32ri: case X86::SBB64ri32:
881   case X86::SUB8ri: case X86::SUB16ri: case X86::SUB32ri: case X86::SUB64ri32:
882   case X86::TEST8ri:case X86::TEST16ri:case X86::TEST32ri:case X86::TEST64ri32:
883   case X86::XOR8ri: case X86::XOR16ri: case X86::XOR32ri: case X86::XOR64ri32: {
884     unsigned NewOpc;
885     switch (OutMI.getOpcode()) {
886     default: llvm_unreachable("Invalid opcode");
887     case X86::ADC8ri:     NewOpc = X86::ADC8i8;    break;
888     case X86::ADC16ri:    NewOpc = X86::ADC16i16;  break;
889     case X86::ADC32ri:    NewOpc = X86::ADC32i32;  break;
890     case X86::ADC64ri32:  NewOpc = X86::ADC64i32;  break;
891     case X86::ADD8ri:     NewOpc = X86::ADD8i8;    break;
892     case X86::ADD16ri:    NewOpc = X86::ADD16i16;  break;
893     case X86::ADD32ri:    NewOpc = X86::ADD32i32;  break;
894     case X86::ADD64ri32:  NewOpc = X86::ADD64i32;  break;
895     case X86::AND8ri:     NewOpc = X86::AND8i8;    break;
896     case X86::AND16ri:    NewOpc = X86::AND16i16;  break;
897     case X86::AND32ri:    NewOpc = X86::AND32i32;  break;
898     case X86::AND64ri32:  NewOpc = X86::AND64i32;  break;
899     case X86::CMP8ri:     NewOpc = X86::CMP8i8;    break;
900     case X86::CMP16ri:    NewOpc = X86::CMP16i16;  break;
901     case X86::CMP32ri:    NewOpc = X86::CMP32i32;  break;
902     case X86::CMP64ri32:  NewOpc = X86::CMP64i32;  break;
903     case X86::OR8ri:      NewOpc = X86::OR8i8;     break;
904     case X86::OR16ri:     NewOpc = X86::OR16i16;   break;
905     case X86::OR32ri:     NewOpc = X86::OR32i32;   break;
906     case X86::OR64ri32:   NewOpc = X86::OR64i32;   break;
907     case X86::SBB8ri:     NewOpc = X86::SBB8i8;    break;
908     case X86::SBB16ri:    NewOpc = X86::SBB16i16;  break;
909     case X86::SBB32ri:    NewOpc = X86::SBB32i32;  break;
910     case X86::SBB64ri32:  NewOpc = X86::SBB64i32;  break;
911     case X86::SUB8ri:     NewOpc = X86::SUB8i8;    break;
912     case X86::SUB16ri:    NewOpc = X86::SUB16i16;  break;
913     case X86::SUB32ri:    NewOpc = X86::SUB32i32;  break;
914     case X86::SUB64ri32:  NewOpc = X86::SUB64i32;  break;
915     case X86::TEST8ri:    NewOpc = X86::TEST8i8;   break;
916     case X86::TEST16ri:   NewOpc = X86::TEST16i16; break;
917     case X86::TEST32ri:   NewOpc = X86::TEST32i32; break;
918     case X86::TEST64ri32: NewOpc = X86::TEST64i32; break;
919     case X86::XOR8ri:     NewOpc = X86::XOR8i8;    break;
920     case X86::XOR16ri:    NewOpc = X86::XOR16i16;  break;
921     case X86::XOR32ri:    NewOpc = X86::XOR32i32;  break;
922     case X86::XOR64ri32:  NewOpc = X86::XOR64i32;  break;
923     }
924     SimplifyShortImmForm(OutMI, NewOpc);
925     break;
926   }
927 
928   // Try to shrink some forms of movsx.
929   case X86::MOVSX16rr8:
930   case X86::MOVSX32rr16:
931   case X86::MOVSX64rr32:
932     SimplifyMOVSX(OutMI);
933     break;
934 
935   case X86::VCMPPDrri:
936   case X86::VCMPPDYrri:
937   case X86::VCMPPSrri:
938   case X86::VCMPPSYrri:
939   case X86::VCMPSDrr:
940   case X86::VCMPSSrr: {
941     // Swap the operands if it will enable a 2 byte VEX encoding.
942     // FIXME: Change the immediate to improve opportunities?
943     if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg()) &&
944         X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
945       unsigned Imm = MI->getOperand(3).getImm() & 0x7;
946       switch (Imm) {
947       default: break;
948       case 0x00: // EQUAL
949       case 0x03: // UNORDERED
950       case 0x04: // NOT EQUAL
951       case 0x07: // ORDERED
952         std::swap(OutMI.getOperand(1), OutMI.getOperand(2));
953         break;
954       }
955     }
956     break;
957   }
958 
959   case X86::VMOVHLPSrr:
960   case X86::VUNPCKHPDrr:
961     // These are not truly commutable so hide them from the default case.
962     break;
963 
964   case X86::MASKMOVDQU:
965   case X86::VMASKMOVDQU:
966     if (AsmPrinter.getSubtarget().is64Bit())
967       OutMI.setFlags(X86::IP_HAS_AD_SIZE);
968     break;
969 
970   default: {
971     // If the instruction is a commutable arithmetic instruction we might be
972     // able to commute the operands to get a 2 byte VEX prefix.
973     uint64_t TSFlags = MI->getDesc().TSFlags;
974     if (MI->getDesc().isCommutable() &&
975         (TSFlags & X86II::EncodingMask) == X86II::VEX &&
976         (TSFlags & X86II::OpMapMask) == X86II::TB &&
977         (TSFlags & X86II::FormMask) == X86II::MRMSrcReg &&
978         !(TSFlags & X86II::VEX_W) && (TSFlags & X86II::VEX_4V) &&
979         OutMI.getNumOperands() == 3) {
980       if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg()) &&
981           X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg()))
982         std::swap(OutMI.getOperand(1), OutMI.getOperand(2));
983     }
984     // Add an REP prefix to BSF instructions so that new processors can
985     // recognize as TZCNT, which has better performance than BSF.
986     if (X86::isBSF(OutMI.getOpcode()) && !MF.getFunction().hasOptSize()) {
987       // BSF and TZCNT have different interpretations on ZF bit. So make sure
988       // it won't be used later.
989       const MachineOperand *FlagDef = MI->findRegisterDefOperand(X86::EFLAGS);
990       if (FlagDef && FlagDef->isDead())
991         OutMI.setFlags(X86::IP_HAS_REPEAT);
992     }
993     break;
994   }
995   }
996 }
997 
998 void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
999                                  const MachineInstr &MI) {
1000   NoAutoPaddingScope NoPadScope(*OutStreamer);
1001   bool Is64Bits = MI.getOpcode() != X86::TLS_addr32 &&
1002                   MI.getOpcode() != X86::TLS_base_addr32;
1003   bool Is64BitsLP64 = MI.getOpcode() == X86::TLS_addr64 ||
1004                       MI.getOpcode() == X86::TLS_base_addr64;
1005   MCContext &Ctx = OutStreamer->getContext();
1006 
1007   MCSymbolRefExpr::VariantKind SRVK;
1008   switch (MI.getOpcode()) {
1009   case X86::TLS_addr32:
1010   case X86::TLS_addr64:
1011   case X86::TLS_addrX32:
1012     SRVK = MCSymbolRefExpr::VK_TLSGD;
1013     break;
1014   case X86::TLS_base_addr32:
1015     SRVK = MCSymbolRefExpr::VK_TLSLDM;
1016     break;
1017   case X86::TLS_base_addr64:
1018   case X86::TLS_base_addrX32:
1019     SRVK = MCSymbolRefExpr::VK_TLSLD;
1020     break;
1021   default:
1022     llvm_unreachable("unexpected opcode");
1023   }
1024 
1025   const MCSymbolRefExpr *Sym = MCSymbolRefExpr::create(
1026       MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)), SRVK, Ctx);
1027 
1028   // As of binutils 2.32, ld has a bogus TLS relaxation error when the GD/LD
1029   // code sequence using R_X86_64_GOTPCREL (instead of R_X86_64_GOTPCRELX) is
1030   // attempted to be relaxed to IE/LE (binutils PR24784). Work around the bug by
1031   // only using GOT when GOTPCRELX is enabled.
1032   // TODO Delete the workaround when GOTPCRELX becomes commonplace.
1033   bool UseGot = MMI->getModule()->getRtLibUseGOT() &&
1034                 Ctx.getAsmInfo()->canRelaxRelocations();
1035 
1036   if (Is64Bits) {
1037     bool NeedsPadding = SRVK == MCSymbolRefExpr::VK_TLSGD;
1038     if (NeedsPadding && Is64BitsLP64)
1039       EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
1040     EmitAndCountInstruction(MCInstBuilder(X86::LEA64r)
1041                                 .addReg(X86::RDI)
1042                                 .addReg(X86::RIP)
1043                                 .addImm(1)
1044                                 .addReg(0)
1045                                 .addExpr(Sym)
1046                                 .addReg(0));
1047     const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("__tls_get_addr");
1048     if (NeedsPadding) {
1049       if (!UseGot)
1050         EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
1051       EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
1052       EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX));
1053     }
1054     if (UseGot) {
1055       const MCExpr *Expr = MCSymbolRefExpr::create(
1056           TlsGetAddr, MCSymbolRefExpr::VK_GOTPCREL, Ctx);
1057       EmitAndCountInstruction(MCInstBuilder(X86::CALL64m)
1058                                   .addReg(X86::RIP)
1059                                   .addImm(1)
1060                                   .addReg(0)
1061                                   .addExpr(Expr)
1062                                   .addReg(0));
1063     } else {
1064       EmitAndCountInstruction(
1065           MCInstBuilder(X86::CALL64pcrel32)
1066               .addExpr(MCSymbolRefExpr::create(TlsGetAddr,
1067                                                MCSymbolRefExpr::VK_PLT, Ctx)));
1068     }
1069   } else {
1070     if (SRVK == MCSymbolRefExpr::VK_TLSGD && !UseGot) {
1071       EmitAndCountInstruction(MCInstBuilder(X86::LEA32r)
1072                                   .addReg(X86::EAX)
1073                                   .addReg(0)
1074                                   .addImm(1)
1075                                   .addReg(X86::EBX)
1076                                   .addExpr(Sym)
1077                                   .addReg(0));
1078     } else {
1079       EmitAndCountInstruction(MCInstBuilder(X86::LEA32r)
1080                                   .addReg(X86::EAX)
1081                                   .addReg(X86::EBX)
1082                                   .addImm(1)
1083                                   .addReg(0)
1084                                   .addExpr(Sym)
1085                                   .addReg(0));
1086     }
1087 
1088     const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("___tls_get_addr");
1089     if (UseGot) {
1090       const MCExpr *Expr =
1091           MCSymbolRefExpr::create(TlsGetAddr, MCSymbolRefExpr::VK_GOT, Ctx);
1092       EmitAndCountInstruction(MCInstBuilder(X86::CALL32m)
1093                                   .addReg(X86::EBX)
1094                                   .addImm(1)
1095                                   .addReg(0)
1096                                   .addExpr(Expr)
1097                                   .addReg(0));
1098     } else {
1099       EmitAndCountInstruction(
1100           MCInstBuilder(X86::CALLpcrel32)
1101               .addExpr(MCSymbolRefExpr::create(TlsGetAddr,
1102                                                MCSymbolRefExpr::VK_PLT, Ctx)));
1103     }
1104   }
1105 }
1106 
1107 /// Emit the largest nop instruction smaller than or equal to \p NumBytes
1108 /// bytes.  Return the size of nop emitted.
1109 static unsigned emitNop(MCStreamer &OS, unsigned NumBytes,
1110                         const X86Subtarget *Subtarget) {
1111   // Determine the longest nop which can be efficiently decoded for the given
1112   // target cpu.  15-bytes is the longest single NOP instruction, but some
1113   // platforms can't decode the longest forms efficiently.
1114   unsigned MaxNopLength = 1;
1115   if (Subtarget->is64Bit()) {
1116     // FIXME: We can use NOOPL on 32-bit targets with FeatureNOPL, but the
1117     // IndexReg/BaseReg below need to be updated.
1118     if (Subtarget->hasFeature(X86::TuningFast7ByteNOP))
1119       MaxNopLength = 7;
1120     else if (Subtarget->hasFeature(X86::TuningFast15ByteNOP))
1121       MaxNopLength = 15;
1122     else if (Subtarget->hasFeature(X86::TuningFast11ByteNOP))
1123       MaxNopLength = 11;
1124     else
1125       MaxNopLength = 10;
1126   } if (Subtarget->is32Bit())
1127     MaxNopLength = 2;
1128 
1129   // Cap a single nop emission at the profitable value for the target
1130   NumBytes = std::min(NumBytes, MaxNopLength);
1131 
1132   unsigned NopSize;
1133   unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg;
1134   IndexReg = Displacement = SegmentReg = 0;
1135   BaseReg = X86::RAX;
1136   ScaleVal = 1;
1137   switch (NumBytes) {
1138   case 0:
1139     llvm_unreachable("Zero nops?");
1140     break;
1141   case 1:
1142     NopSize = 1;
1143     Opc = X86::NOOP;
1144     break;
1145   case 2:
1146     NopSize = 2;
1147     Opc = X86::XCHG16ar;
1148     break;
1149   case 3:
1150     NopSize = 3;
1151     Opc = X86::NOOPL;
1152     break;
1153   case 4:
1154     NopSize = 4;
1155     Opc = X86::NOOPL;
1156     Displacement = 8;
1157     break;
1158   case 5:
1159     NopSize = 5;
1160     Opc = X86::NOOPL;
1161     Displacement = 8;
1162     IndexReg = X86::RAX;
1163     break;
1164   case 6:
1165     NopSize = 6;
1166     Opc = X86::NOOPW;
1167     Displacement = 8;
1168     IndexReg = X86::RAX;
1169     break;
1170   case 7:
1171     NopSize = 7;
1172     Opc = X86::NOOPL;
1173     Displacement = 512;
1174     break;
1175   case 8:
1176     NopSize = 8;
1177     Opc = X86::NOOPL;
1178     Displacement = 512;
1179     IndexReg = X86::RAX;
1180     break;
1181   case 9:
1182     NopSize = 9;
1183     Opc = X86::NOOPW;
1184     Displacement = 512;
1185     IndexReg = X86::RAX;
1186     break;
1187   default:
1188     NopSize = 10;
1189     Opc = X86::NOOPW;
1190     Displacement = 512;
1191     IndexReg = X86::RAX;
1192     SegmentReg = X86::CS;
1193     break;
1194   }
1195 
1196   unsigned NumPrefixes = std::min(NumBytes - NopSize, 5U);
1197   NopSize += NumPrefixes;
1198   for (unsigned i = 0; i != NumPrefixes; ++i)
1199     OS.emitBytes("\x66");
1200 
1201   switch (Opc) {
1202   default: llvm_unreachable("Unexpected opcode");
1203   case X86::NOOP:
1204     OS.emitInstruction(MCInstBuilder(Opc), *Subtarget);
1205     break;
1206   case X86::XCHG16ar:
1207     OS.emitInstruction(MCInstBuilder(Opc).addReg(X86::AX).addReg(X86::AX),
1208                        *Subtarget);
1209     break;
1210   case X86::NOOPL:
1211   case X86::NOOPW:
1212     OS.emitInstruction(MCInstBuilder(Opc)
1213                            .addReg(BaseReg)
1214                            .addImm(ScaleVal)
1215                            .addReg(IndexReg)
1216                            .addImm(Displacement)
1217                            .addReg(SegmentReg),
1218                        *Subtarget);
1219     break;
1220   }
1221   assert(NopSize <= NumBytes && "We overemitted?");
1222   return NopSize;
1223 }
1224 
1225 /// Emit the optimal amount of multi-byte nops on X86.
1226 static void emitX86Nops(MCStreamer &OS, unsigned NumBytes,
1227                         const X86Subtarget *Subtarget) {
1228   unsigned NopsToEmit = NumBytes;
1229   (void)NopsToEmit;
1230   while (NumBytes) {
1231     NumBytes -= emitNop(OS, NumBytes, Subtarget);
1232     assert(NopsToEmit >= NumBytes && "Emitted more than I asked for!");
1233   }
1234 }
1235 
1236 void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
1237                                     X86MCInstLower &MCIL) {
1238   assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64");
1239 
1240   NoAutoPaddingScope NoPadScope(*OutStreamer);
1241 
1242   StatepointOpers SOpers(&MI);
1243   if (unsigned PatchBytes = SOpers.getNumPatchBytes()) {
1244     emitX86Nops(*OutStreamer, PatchBytes, Subtarget);
1245   } else {
1246     // Lower call target and choose correct opcode
1247     const MachineOperand &CallTarget = SOpers.getCallTarget();
1248     MCOperand CallTargetMCOp;
1249     unsigned CallOpcode;
1250     switch (CallTarget.getType()) {
1251     case MachineOperand::MO_GlobalAddress:
1252     case MachineOperand::MO_ExternalSymbol:
1253       CallTargetMCOp = MCIL.LowerSymbolOperand(
1254           CallTarget, MCIL.GetSymbolFromOperand(CallTarget));
1255       CallOpcode = X86::CALL64pcrel32;
1256       // Currently, we only support relative addressing with statepoints.
1257       // Otherwise, we'll need a scratch register to hold the target
1258       // address.  You'll fail asserts during load & relocation if this
1259       // symbol is to far away. (TODO: support non-relative addressing)
1260       break;
1261     case MachineOperand::MO_Immediate:
1262       CallTargetMCOp = MCOperand::createImm(CallTarget.getImm());
1263       CallOpcode = X86::CALL64pcrel32;
1264       // Currently, we only support relative addressing with statepoints.
1265       // Otherwise, we'll need a scratch register to hold the target
1266       // immediate.  You'll fail asserts during load & relocation if this
1267       // address is to far away. (TODO: support non-relative addressing)
1268       break;
1269     case MachineOperand::MO_Register:
1270       // FIXME: Add retpoline support and remove this.
1271       if (Subtarget->useIndirectThunkCalls())
1272         report_fatal_error("Lowering register statepoints with thunks not "
1273                            "yet implemented.");
1274       CallTargetMCOp = MCOperand::createReg(CallTarget.getReg());
1275       CallOpcode = X86::CALL64r;
1276       break;
1277     default:
1278       llvm_unreachable("Unsupported operand type in statepoint call target");
1279       break;
1280     }
1281 
1282     // Emit call
1283     MCInst CallInst;
1284     CallInst.setOpcode(CallOpcode);
1285     CallInst.addOperand(CallTargetMCOp);
1286     OutStreamer->emitInstruction(CallInst, getSubtargetInfo());
1287   }
1288 
1289   // Record our statepoint node in the same section used by STACKMAP
1290   // and PATCHPOINT
1291   auto &Ctx = OutStreamer->getContext();
1292   MCSymbol *MILabel = Ctx.createTempSymbol();
1293   OutStreamer->emitLabel(MILabel);
1294   SM.recordStatepoint(*MILabel, MI);
1295 }
1296 
1297 void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,
1298                                      X86MCInstLower &MCIL) {
1299   // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>,
1300   //                  <opcode>, <operands>
1301 
1302   NoAutoPaddingScope NoPadScope(*OutStreamer);
1303 
1304   Register DefRegister = FaultingMI.getOperand(0).getReg();
1305   FaultMaps::FaultKind FK =
1306       static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(1).getImm());
1307   MCSymbol *HandlerLabel = FaultingMI.getOperand(2).getMBB()->getSymbol();
1308   unsigned Opcode = FaultingMI.getOperand(3).getImm();
1309   unsigned OperandsBeginIdx = 4;
1310 
1311   auto &Ctx = OutStreamer->getContext();
1312   MCSymbol *FaultingLabel = Ctx.createTempSymbol();
1313   OutStreamer->emitLabel(FaultingLabel);
1314 
1315   assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!");
1316   FM.recordFaultingOp(FK, FaultingLabel, HandlerLabel);
1317 
1318   MCInst MI;
1319   MI.setOpcode(Opcode);
1320 
1321   if (DefRegister != X86::NoRegister)
1322     MI.addOperand(MCOperand::createReg(DefRegister));
1323 
1324   for (const MachineOperand &MO :
1325        llvm::drop_begin(FaultingMI.operands(), OperandsBeginIdx))
1326     if (auto MaybeOperand = MCIL.LowerMachineOperand(&FaultingMI, MO))
1327       MI.addOperand(*MaybeOperand);
1328 
1329   OutStreamer->AddComment("on-fault: " + HandlerLabel->getName());
1330   OutStreamer->emitInstruction(MI, getSubtargetInfo());
1331 }
1332 
1333 void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI,
1334                                      X86MCInstLower &MCIL) {
1335   bool Is64Bits = Subtarget->is64Bit();
1336   MCContext &Ctx = OutStreamer->getContext();
1337   MCSymbol *fentry = Ctx.getOrCreateSymbol("__fentry__");
1338   const MCSymbolRefExpr *Op =
1339       MCSymbolRefExpr::create(fentry, MCSymbolRefExpr::VK_None, Ctx);
1340 
1341   EmitAndCountInstruction(
1342       MCInstBuilder(Is64Bits ? X86::CALL64pcrel32 : X86::CALLpcrel32)
1343           .addExpr(Op));
1344 }
1345 
1346 void X86AsmPrinter::LowerKCFI_CHECK(const MachineInstr &MI) {
1347   assert(std::next(MI.getIterator())->isCall() &&
1348          "KCFI_CHECK not followed by a call instruction");
1349 
1350   // Adjust the offset for patchable-function-prefix. X86InstrInfo::getNop()
1351   // returns a 1-byte X86::NOOP, which means the offset is the same in
1352   // bytes.  This assumes that patchable-function-prefix is the same for all
1353   // functions.
1354   const MachineFunction &MF = *MI.getMF();
1355   int64_t PrefixNops = 0;
1356   (void)MF.getFunction()
1357       .getFnAttribute("patchable-function-prefix")
1358       .getValueAsString()
1359       .getAsInteger(10, PrefixNops);
1360 
1361   // KCFI allows indirect calls to any location that's preceded by a valid
1362   // type identifier. To avoid encoding the full constant into an instruction,
1363   // and thus emitting potential call target gadgets at each indirect call
1364   // site, load a negated constant to a register and compare that to the
1365   // expected value at the call target.
1366   const Register AddrReg = MI.getOperand(0).getReg();
1367   const uint32_t Type = MI.getOperand(1).getImm();
1368   // The check is immediately before the call. If the call target is in R10,
1369   // we can clobber R11 for the check instead.
1370   unsigned TempReg = AddrReg == X86::R10 ? X86::R11D : X86::R10D;
1371   EmitAndCountInstruction(
1372       MCInstBuilder(X86::MOV32ri).addReg(TempReg).addImm(-MaskKCFIType(Type)));
1373   EmitAndCountInstruction(MCInstBuilder(X86::ADD32rm)
1374                               .addReg(X86::NoRegister)
1375                               .addReg(TempReg)
1376                               .addReg(AddrReg)
1377                               .addImm(1)
1378                               .addReg(X86::NoRegister)
1379                               .addImm(-(PrefixNops + 4))
1380                               .addReg(X86::NoRegister));
1381 
1382   MCSymbol *Pass = OutContext.createTempSymbol();
1383   EmitAndCountInstruction(
1384       MCInstBuilder(X86::JCC_1)
1385           .addExpr(MCSymbolRefExpr::create(Pass, OutContext))
1386           .addImm(X86::COND_E));
1387 
1388   MCSymbol *Trap = OutContext.createTempSymbol();
1389   OutStreamer->emitLabel(Trap);
1390   EmitAndCountInstruction(MCInstBuilder(X86::TRAP));
1391   emitKCFITrapEntry(MF, Trap);
1392   OutStreamer->emitLabel(Pass);
1393 }
1394 
1395 void X86AsmPrinter::LowerASAN_CHECK_MEMACCESS(const MachineInstr &MI) {
1396   // FIXME: Make this work on non-ELF.
1397   if (!TM.getTargetTriple().isOSBinFormatELF()) {
1398     report_fatal_error("llvm.asan.check.memaccess only supported on ELF");
1399     return;
1400   }
1401 
1402   const auto &Reg = MI.getOperand(0).getReg();
1403   ASanAccessInfo AccessInfo(MI.getOperand(1).getImm());
1404 
1405   uint64_t ShadowBase;
1406   int MappingScale;
1407   bool OrShadowOffset;
1408   getAddressSanitizerParams(Triple(TM.getTargetTriple()), 64,
1409                             AccessInfo.CompileKernel, &ShadowBase,
1410                             &MappingScale, &OrShadowOffset);
1411 
1412   StringRef Name = AccessInfo.IsWrite ? "store" : "load";
1413   StringRef Op = OrShadowOffset ? "or" : "add";
1414   std::string SymName = ("__asan_check_" + Name + "_" + Op + "_" +
1415                          Twine(1ULL << AccessInfo.AccessSizeIndex) + "_" +
1416                          TM.getMCRegisterInfo()->getName(Reg.asMCReg()))
1417                             .str();
1418   if (OrShadowOffset)
1419     report_fatal_error(
1420         "OrShadowOffset is not supported with optimized callbacks");
1421 
1422   EmitAndCountInstruction(
1423       MCInstBuilder(X86::CALL64pcrel32)
1424           .addExpr(MCSymbolRefExpr::create(
1425               OutContext.getOrCreateSymbol(SymName), OutContext)));
1426 }
1427 
1428 void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
1429                                       X86MCInstLower &MCIL) {
1430   // PATCHABLE_OP minsize, opcode, operands
1431 
1432   NoAutoPaddingScope NoPadScope(*OutStreamer);
1433 
1434   unsigned MinSize = MI.getOperand(0).getImm();
1435   unsigned Opcode = MI.getOperand(1).getImm();
1436   // Opcode PATCHABLE_OP is a special case: there is no instruction to wrap,
1437   // simply emit a nop of size MinSize.
1438   bool EmptyInst = (Opcode == TargetOpcode::PATCHABLE_OP);
1439 
1440   MCInst MCI;
1441   MCI.setOpcode(Opcode);
1442   for (auto &MO : drop_begin(MI.operands(), 2))
1443     if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
1444       MCI.addOperand(*MaybeOperand);
1445 
1446   SmallString<256> Code;
1447   if (!EmptyInst) {
1448     SmallVector<MCFixup, 4> Fixups;
1449     raw_svector_ostream VecOS(Code);
1450     CodeEmitter->encodeInstruction(MCI, VecOS, Fixups, getSubtargetInfo());
1451   }
1452 
1453   if (Code.size() < MinSize) {
1454     if (MinSize == 2 && Subtarget->is32Bit() &&
1455         Subtarget->isTargetWindowsMSVC() &&
1456         (Subtarget->getCPU().empty() || Subtarget->getCPU() == "pentium3")) {
1457       // For compatibility reasons, when targetting MSVC, is is important to
1458       // generate a 'legacy' NOP in the form of a 8B FF MOV EDI, EDI. Some tools
1459       // rely specifically on this pattern to be able to patch a function.
1460       // This is only for 32-bit targets, when using /arch:IA32 or /arch:SSE.
1461       OutStreamer->emitInstruction(
1462           MCInstBuilder(X86::MOV32rr_REV).addReg(X86::EDI).addReg(X86::EDI),
1463           *Subtarget);
1464     } else if (MinSize == 2 && Opcode == X86::PUSH64r) {
1465       // This is an optimization that lets us get away without emitting a nop in
1466       // many cases.
1467       //
1468       // NB! In some cases the encoding for PUSH64r (e.g. PUSH64r %r9) takes two
1469       // bytes too, so the check on MinSize is important.
1470       MCI.setOpcode(X86::PUSH64rmr);
1471     } else {
1472       unsigned NopSize = emitNop(*OutStreamer, MinSize, Subtarget);
1473       assert(NopSize == MinSize && "Could not implement MinSize!");
1474       (void)NopSize;
1475     }
1476   }
1477   if (!EmptyInst)
1478     OutStreamer->emitInstruction(MCI, getSubtargetInfo());
1479 }
1480 
1481 // Lower a stackmap of the form:
1482 // <id>, <shadowBytes>, ...
1483 void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
1484   SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
1485 
1486   auto &Ctx = OutStreamer->getContext();
1487   MCSymbol *MILabel = Ctx.createTempSymbol();
1488   OutStreamer->emitLabel(MILabel);
1489 
1490   SM.recordStackMap(*MILabel, MI);
1491   unsigned NumShadowBytes = MI.getOperand(1).getImm();
1492   SMShadowTracker.reset(NumShadowBytes);
1493 }
1494 
1495 // Lower a patchpoint of the form:
1496 // [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ...
1497 void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
1498                                     X86MCInstLower &MCIL) {
1499   assert(Subtarget->is64Bit() && "Patchpoint currently only supports X86-64");
1500 
1501   SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
1502 
1503   NoAutoPaddingScope NoPadScope(*OutStreamer);
1504 
1505   auto &Ctx = OutStreamer->getContext();
1506   MCSymbol *MILabel = Ctx.createTempSymbol();
1507   OutStreamer->emitLabel(MILabel);
1508   SM.recordPatchPoint(*MILabel, MI);
1509 
1510   PatchPointOpers opers(&MI);
1511   unsigned ScratchIdx = opers.getNextScratchIdx();
1512   unsigned EncodedBytes = 0;
1513   const MachineOperand &CalleeMO = opers.getCallTarget();
1514 
1515   // Check for null target. If target is non-null (i.e. is non-zero or is
1516   // symbolic) then emit a call.
1517   if (!(CalleeMO.isImm() && !CalleeMO.getImm())) {
1518     MCOperand CalleeMCOp;
1519     switch (CalleeMO.getType()) {
1520     default:
1521       /// FIXME: Add a verifier check for bad callee types.
1522       llvm_unreachable("Unrecognized callee operand type.");
1523     case MachineOperand::MO_Immediate:
1524       if (CalleeMO.getImm())
1525         CalleeMCOp = MCOperand::createImm(CalleeMO.getImm());
1526       break;
1527     case MachineOperand::MO_ExternalSymbol:
1528     case MachineOperand::MO_GlobalAddress:
1529       CalleeMCOp = MCIL.LowerSymbolOperand(CalleeMO,
1530                                            MCIL.GetSymbolFromOperand(CalleeMO));
1531       break;
1532     }
1533 
1534     // Emit MOV to materialize the target address and the CALL to target.
1535     // This is encoded with 12-13 bytes, depending on which register is used.
1536     Register ScratchReg = MI.getOperand(ScratchIdx).getReg();
1537     if (X86II::isX86_64ExtendedReg(ScratchReg))
1538       EncodedBytes = 13;
1539     else
1540       EncodedBytes = 12;
1541 
1542     EmitAndCountInstruction(
1543         MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp));
1544     // FIXME: Add retpoline support and remove this.
1545     if (Subtarget->useIndirectThunkCalls())
1546       report_fatal_error(
1547           "Lowering patchpoint with thunks not yet implemented.");
1548     EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg));
1549   }
1550 
1551   // Emit padding.
1552   unsigned NumBytes = opers.getNumPatchBytes();
1553   assert(NumBytes >= EncodedBytes &&
1554          "Patchpoint can't request size less than the length of a call.");
1555 
1556   emitX86Nops(*OutStreamer, NumBytes - EncodedBytes, Subtarget);
1557 }
1558 
1559 void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
1560                                               X86MCInstLower &MCIL) {
1561   assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64");
1562 
1563   NoAutoPaddingScope NoPadScope(*OutStreamer);
1564 
1565   // We want to emit the following pattern, which follows the x86 calling
1566   // convention to prepare for the trampoline call to be patched in.
1567   //
1568   //   .p2align 1, ...
1569   // .Lxray_event_sled_N:
1570   //   jmp +N                        // jump across the instrumentation sled
1571   //   ...                           // set up arguments in register
1572   //   callq __xray_CustomEvent@plt  // force dependency to symbol
1573   //   ...
1574   //   <jump here>
1575   //
1576   // After patching, it would look something like:
1577   //
1578   //   nopw (2-byte nop)
1579   //   ...
1580   //   callq __xrayCustomEvent  // already lowered
1581   //   ...
1582   //
1583   // ---
1584   // First we emit the label and the jump.
1585   auto CurSled = OutContext.createTempSymbol("xray_event_sled_", true);
1586   OutStreamer->AddComment("# XRay Custom Event Log");
1587   OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo());
1588   OutStreamer->emitLabel(CurSled);
1589 
1590   // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1591   // an operand (computed as an offset from the jmp instruction).
1592   // FIXME: Find another less hacky way do force the relative jump.
1593   OutStreamer->emitBinaryData("\xeb\x0f");
1594 
1595   // The default C calling convention will place two arguments into %rcx and
1596   // %rdx -- so we only work with those.
1597   const Register DestRegs[] = {X86::RDI, X86::RSI};
1598   bool UsedMask[] = {false, false};
1599   // Filled out in loop.
1600   Register SrcRegs[] = {0, 0};
1601 
1602   // Then we put the operands in the %rdi and %rsi registers. We spill the
1603   // values in the register before we clobber them, and mark them as used in
1604   // UsedMask. In case the arguments are already in the correct register, we use
1605   // emit nops appropriately sized to keep the sled the same size in every
1606   // situation.
1607   for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1608     if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
1609       assert(Op->isReg() && "Only support arguments in registers");
1610       SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64);
1611       if (SrcRegs[I] != DestRegs[I]) {
1612         UsedMask[I] = true;
1613         EmitAndCountInstruction(
1614             MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
1615       } else {
1616         emitX86Nops(*OutStreamer, 4, Subtarget);
1617       }
1618     }
1619 
1620   // Now that the register values are stashed, mov arguments into place.
1621   // FIXME: This doesn't work if one of the later SrcRegs is equal to an
1622   // earlier DestReg. We will have already overwritten over the register before
1623   // we can copy from it.
1624   for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1625     if (SrcRegs[I] != DestRegs[I])
1626       EmitAndCountInstruction(
1627           MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I]));
1628 
1629   // We emit a hard dependency on the __xray_CustomEvent symbol, which is the
1630   // name of the trampoline to be implemented by the XRay runtime.
1631   auto TSym = OutContext.getOrCreateSymbol("__xray_CustomEvent");
1632   MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym);
1633   if (isPositionIndependent())
1634     TOp.setTargetFlags(X86II::MO_PLT);
1635 
1636   // Emit the call instruction.
1637   EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32)
1638                               .addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
1639 
1640   // Restore caller-saved and used registers.
1641   for (unsigned I = sizeof UsedMask; I-- > 0;)
1642     if (UsedMask[I])
1643       EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
1644     else
1645       emitX86Nops(*OutStreamer, 1, Subtarget);
1646 
1647   OutStreamer->AddComment("xray custom event end.");
1648 
1649   // Record the sled version. Version 0 of this sled was spelled differently, so
1650   // we let the runtime handle the different offsets we're using. Version 2
1651   // changed the absolute address to a PC-relative address.
1652   recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 2);
1653 }
1654 
1655 void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
1656                                                     X86MCInstLower &MCIL) {
1657   assert(Subtarget->is64Bit() && "XRay typed events only supports X86-64");
1658 
1659   NoAutoPaddingScope NoPadScope(*OutStreamer);
1660 
1661   // We want to emit the following pattern, which follows the x86 calling
1662   // convention to prepare for the trampoline call to be patched in.
1663   //
1664   //   .p2align 1, ...
1665   // .Lxray_event_sled_N:
1666   //   jmp +N                        // jump across the instrumentation sled
1667   //   ...                           // set up arguments in register
1668   //   callq __xray_TypedEvent@plt  // force dependency to symbol
1669   //   ...
1670   //   <jump here>
1671   //
1672   // After patching, it would look something like:
1673   //
1674   //   nopw (2-byte nop)
1675   //   ...
1676   //   callq __xrayTypedEvent  // already lowered
1677   //   ...
1678   //
1679   // ---
1680   // First we emit the label and the jump.
1681   auto CurSled = OutContext.createTempSymbol("xray_typed_event_sled_", true);
1682   OutStreamer->AddComment("# XRay Typed Event Log");
1683   OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo());
1684   OutStreamer->emitLabel(CurSled);
1685 
1686   // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1687   // an operand (computed as an offset from the jmp instruction).
1688   // FIXME: Find another less hacky way do force the relative jump.
1689   OutStreamer->emitBinaryData("\xeb\x14");
1690 
1691   // An x86-64 convention may place three arguments into %rcx, %rdx, and R8,
1692   // so we'll work with those. Or we may be called via SystemV, in which case
1693   // we don't have to do any translation.
1694   const Register DestRegs[] = {X86::RDI, X86::RSI, X86::RDX};
1695   bool UsedMask[] = {false, false, false};
1696 
1697   // Will fill out src regs in the loop.
1698   Register SrcRegs[] = {0, 0, 0};
1699 
1700   // Then we put the operands in the SystemV registers. We spill the values in
1701   // the registers before we clobber them, and mark them as used in UsedMask.
1702   // In case the arguments are already in the correct register, we emit nops
1703   // appropriately sized to keep the sled the same size in every situation.
1704   for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1705     if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
1706       // TODO: Is register only support adequate?
1707       assert(Op->isReg() && "Only supports arguments in registers");
1708       SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64);
1709       if (SrcRegs[I] != DestRegs[I]) {
1710         UsedMask[I] = true;
1711         EmitAndCountInstruction(
1712             MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
1713       } else {
1714         emitX86Nops(*OutStreamer, 4, Subtarget);
1715       }
1716     }
1717 
1718   // In the above loop we only stash all of the destination registers or emit
1719   // nops if the arguments are already in the right place. Doing the actually
1720   // moving is postponed until after all the registers are stashed so nothing
1721   // is clobbers. We've already added nops to account for the size of mov and
1722   // push if the register is in the right place, so we only have to worry about
1723   // emitting movs.
1724   // FIXME: This doesn't work if one of the later SrcRegs is equal to an
1725   // earlier DestReg. We will have already overwritten over the register before
1726   // we can copy from it.
1727   for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1728     if (UsedMask[I])
1729       EmitAndCountInstruction(
1730           MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I]));
1731 
1732   // We emit a hard dependency on the __xray_TypedEvent symbol, which is the
1733   // name of the trampoline to be implemented by the XRay runtime.
1734   auto TSym = OutContext.getOrCreateSymbol("__xray_TypedEvent");
1735   MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym);
1736   if (isPositionIndependent())
1737     TOp.setTargetFlags(X86II::MO_PLT);
1738 
1739   // Emit the call instruction.
1740   EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32)
1741                               .addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
1742 
1743   // Restore caller-saved and used registers.
1744   for (unsigned I = sizeof UsedMask; I-- > 0;)
1745     if (UsedMask[I])
1746       EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
1747     else
1748       emitX86Nops(*OutStreamer, 1, Subtarget);
1749 
1750   OutStreamer->AddComment("xray typed event end.");
1751 
1752   // Record the sled version.
1753   recordSled(CurSled, MI, SledKind::TYPED_EVENT, 2);
1754 }
1755 
1756 void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
1757                                                   X86MCInstLower &MCIL) {
1758 
1759   NoAutoPaddingScope NoPadScope(*OutStreamer);
1760 
1761   const Function &F = MF->getFunction();
1762   if (F.hasFnAttribute("patchable-function-entry")) {
1763     unsigned Num;
1764     if (F.getFnAttribute("patchable-function-entry")
1765             .getValueAsString()
1766             .getAsInteger(10, Num))
1767       return;
1768     emitX86Nops(*OutStreamer, Num, Subtarget);
1769     return;
1770   }
1771   // We want to emit the following pattern:
1772   //
1773   //   .p2align 1, ...
1774   // .Lxray_sled_N:
1775   //   jmp .tmpN
1776   //   # 9 bytes worth of noops
1777   //
1778   // We need the 9 bytes because at runtime, we'd be patching over the full 11
1779   // bytes with the following pattern:
1780   //
1781   //   mov %r10, <function id, 32-bit>   // 6 bytes
1782   //   call <relative offset, 32-bits>   // 5 bytes
1783   //
1784   auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1785   OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo());
1786   OutStreamer->emitLabel(CurSled);
1787 
1788   // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1789   // an operand (computed as an offset from the jmp instruction).
1790   // FIXME: Find another less hacky way do force the relative jump.
1791   OutStreamer->emitBytes("\xeb\x09");
1792   emitX86Nops(*OutStreamer, 9, Subtarget);
1793   recordSled(CurSled, MI, SledKind::FUNCTION_ENTER, 2);
1794 }
1795 
1796 void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
1797                                        X86MCInstLower &MCIL) {
1798   NoAutoPaddingScope NoPadScope(*OutStreamer);
1799 
1800   // Since PATCHABLE_RET takes the opcode of the return statement as an
1801   // argument, we use that to emit the correct form of the RET that we want.
1802   // i.e. when we see this:
1803   //
1804   //   PATCHABLE_RET X86::RET ...
1805   //
1806   // We should emit the RET followed by sleds.
1807   //
1808   //   .p2align 1, ...
1809   // .Lxray_sled_N:
1810   //   ret  # or equivalent instruction
1811   //   # 10 bytes worth of noops
1812   //
1813   // This just makes sure that the alignment for the next instruction is 2.
1814   auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1815   OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo());
1816   OutStreamer->emitLabel(CurSled);
1817   unsigned OpCode = MI.getOperand(0).getImm();
1818   MCInst Ret;
1819   Ret.setOpcode(OpCode);
1820   for (auto &MO : drop_begin(MI.operands()))
1821     if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
1822       Ret.addOperand(*MaybeOperand);
1823   OutStreamer->emitInstruction(Ret, getSubtargetInfo());
1824   emitX86Nops(*OutStreamer, 10, Subtarget);
1825   recordSled(CurSled, MI, SledKind::FUNCTION_EXIT, 2);
1826 }
1827 
1828 void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,
1829                                              X86MCInstLower &MCIL) {
1830   NoAutoPaddingScope NoPadScope(*OutStreamer);
1831 
1832   // Like PATCHABLE_RET, we have the actual instruction in the operands to this
1833   // instruction so we lower that particular instruction and its operands.
1834   // Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how
1835   // we do it for PATCHABLE_FUNCTION_ENTER. The sled should be very similar to
1836   // the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual
1837   // tail call much like how we have it in PATCHABLE_RET.
1838   auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1839   OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo());
1840   OutStreamer->emitLabel(CurSled);
1841   auto Target = OutContext.createTempSymbol();
1842 
1843   // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1844   // an operand (computed as an offset from the jmp instruction).
1845   // FIXME: Find another less hacky way do force the relative jump.
1846   OutStreamer->emitBytes("\xeb\x09");
1847   emitX86Nops(*OutStreamer, 9, Subtarget);
1848   OutStreamer->emitLabel(Target);
1849   recordSled(CurSled, MI, SledKind::TAIL_CALL, 2);
1850 
1851   unsigned OpCode = MI.getOperand(0).getImm();
1852   OpCode = convertTailJumpOpcode(OpCode);
1853   MCInst TC;
1854   TC.setOpcode(OpCode);
1855 
1856   // Before emitting the instruction, add a comment to indicate that this is
1857   // indeed a tail call.
1858   OutStreamer->AddComment("TAILCALL");
1859   for (auto &MO : drop_begin(MI.operands()))
1860     if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
1861       TC.addOperand(*MaybeOperand);
1862   OutStreamer->emitInstruction(TC, getSubtargetInfo());
1863 }
1864 
1865 // Returns instruction preceding MBBI in MachineFunction.
1866 // If MBBI is the first instruction of the first basic block, returns null.
1867 static MachineBasicBlock::const_iterator
1868 PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) {
1869   const MachineBasicBlock *MBB = MBBI->getParent();
1870   while (MBBI == MBB->begin()) {
1871     if (MBB == &MBB->getParent()->front())
1872       return MachineBasicBlock::const_iterator();
1873     MBB = MBB->getPrevNode();
1874     MBBI = MBB->end();
1875   }
1876   --MBBI;
1877   return MBBI;
1878 }
1879 
1880 static const Constant *getConstantFromPool(const MachineInstr &MI,
1881                                            const MachineOperand &Op) {
1882   if (!Op.isCPI() || Op.getOffset() != 0)
1883     return nullptr;
1884 
1885   ArrayRef<MachineConstantPoolEntry> Constants =
1886       MI.getParent()->getParent()->getConstantPool()->getConstants();
1887   const MachineConstantPoolEntry &ConstantEntry = Constants[Op.getIndex()];
1888 
1889   // Bail if this is a machine constant pool entry, we won't be able to dig out
1890   // anything useful.
1891   if (ConstantEntry.isMachineConstantPoolEntry())
1892     return nullptr;
1893 
1894   return ConstantEntry.Val.ConstVal;
1895 }
1896 
1897 static std::string getShuffleComment(const MachineInstr *MI, unsigned SrcOp1Idx,
1898                                      unsigned SrcOp2Idx, ArrayRef<int> Mask) {
1899   std::string Comment;
1900 
1901   // Compute the name for a register. This is really goofy because we have
1902   // multiple instruction printers that could (in theory) use different
1903   // names. Fortunately most people use the ATT style (outside of Windows)
1904   // and they actually agree on register naming here. Ultimately, this is
1905   // a comment, and so its OK if it isn't perfect.
1906   auto GetRegisterName = [](MCRegister Reg) -> StringRef {
1907     return X86ATTInstPrinter::getRegisterName(Reg);
1908   };
1909 
1910   const MachineOperand &DstOp = MI->getOperand(0);
1911   const MachineOperand &SrcOp1 = MI->getOperand(SrcOp1Idx);
1912   const MachineOperand &SrcOp2 = MI->getOperand(SrcOp2Idx);
1913 
1914   StringRef DstName = DstOp.isReg() ? GetRegisterName(DstOp.getReg()) : "mem";
1915   StringRef Src1Name =
1916       SrcOp1.isReg() ? GetRegisterName(SrcOp1.getReg()) : "mem";
1917   StringRef Src2Name =
1918       SrcOp2.isReg() ? GetRegisterName(SrcOp2.getReg()) : "mem";
1919 
1920   // One source operand, fix the mask to print all elements in one span.
1921   SmallVector<int, 8> ShuffleMask(Mask);
1922   if (Src1Name == Src2Name)
1923     for (int i = 0, e = ShuffleMask.size(); i != e; ++i)
1924       if (ShuffleMask[i] >= e)
1925         ShuffleMask[i] -= e;
1926 
1927   raw_string_ostream CS(Comment);
1928   CS << DstName;
1929 
1930   // Handle AVX512 MASK/MASXZ write mask comments.
1931   // MASK: zmmX {%kY}
1932   // MASKZ: zmmX {%kY} {z}
1933   if (SrcOp1Idx > 1) {
1934     assert((SrcOp1Idx == 2 || SrcOp1Idx == 3) && "Unexpected writemask");
1935 
1936     const MachineOperand &WriteMaskOp = MI->getOperand(SrcOp1Idx - 1);
1937     if (WriteMaskOp.isReg()) {
1938       CS << " {%" << GetRegisterName(WriteMaskOp.getReg()) << "}";
1939 
1940       if (SrcOp1Idx == 2) {
1941         CS << " {z}";
1942       }
1943     }
1944   }
1945 
1946   CS << " = ";
1947 
1948   for (int i = 0, e = ShuffleMask.size(); i != e; ++i) {
1949     if (i != 0)
1950       CS << ",";
1951     if (ShuffleMask[i] == SM_SentinelZero) {
1952       CS << "zero";
1953       continue;
1954     }
1955 
1956     // Otherwise, it must come from src1 or src2.  Print the span of elements
1957     // that comes from this src.
1958     bool isSrc1 = ShuffleMask[i] < (int)e;
1959     CS << (isSrc1 ? Src1Name : Src2Name) << '[';
1960 
1961     bool IsFirst = true;
1962     while (i != e && ShuffleMask[i] != SM_SentinelZero &&
1963            (ShuffleMask[i] < (int)e) == isSrc1) {
1964       if (!IsFirst)
1965         CS << ',';
1966       else
1967         IsFirst = false;
1968       if (ShuffleMask[i] == SM_SentinelUndef)
1969         CS << "u";
1970       else
1971         CS << ShuffleMask[i] % (int)e;
1972       ++i;
1973     }
1974     CS << ']';
1975     --i; // For loop increments element #.
1976   }
1977   CS.flush();
1978 
1979   return Comment;
1980 }
1981 
1982 static void printConstant(const APInt &Val, raw_ostream &CS) {
1983   if (Val.getBitWidth() <= 64) {
1984     CS << Val.getZExtValue();
1985   } else {
1986     // print multi-word constant as (w0,w1)
1987     CS << "(";
1988     for (int i = 0, N = Val.getNumWords(); i < N; ++i) {
1989       if (i > 0)
1990         CS << ",";
1991       CS << Val.getRawData()[i];
1992     }
1993     CS << ")";
1994   }
1995 }
1996 
1997 static void printConstant(const APFloat &Flt, raw_ostream &CS) {
1998   SmallString<32> Str;
1999   // Force scientific notation to distinquish from integers.
2000   Flt.toString(Str, 0, 0);
2001   CS << Str;
2002 }
2003 
2004 static void printConstant(const Constant *COp, raw_ostream &CS) {
2005   if (isa<UndefValue>(COp)) {
2006     CS << "u";
2007   } else if (auto *CI = dyn_cast<ConstantInt>(COp)) {
2008     printConstant(CI->getValue(), CS);
2009   } else if (auto *CF = dyn_cast<ConstantFP>(COp)) {
2010     printConstant(CF->getValueAPF(), CS);
2011   } else {
2012     CS << "?";
2013   }
2014 }
2015 
2016 void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) {
2017   assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
2018   assert(getSubtarget().isOSWindows() && "SEH_ instruction Windows only");
2019 
2020   // Use the .cv_fpo directives if we're emitting CodeView on 32-bit x86.
2021   if (EmitFPOData) {
2022     X86TargetStreamer *XTS =
2023         static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer());
2024     switch (MI->getOpcode()) {
2025     case X86::SEH_PushReg:
2026       XTS->emitFPOPushReg(MI->getOperand(0).getImm());
2027       break;
2028     case X86::SEH_StackAlloc:
2029       XTS->emitFPOStackAlloc(MI->getOperand(0).getImm());
2030       break;
2031     case X86::SEH_StackAlign:
2032       XTS->emitFPOStackAlign(MI->getOperand(0).getImm());
2033       break;
2034     case X86::SEH_SetFrame:
2035       assert(MI->getOperand(1).getImm() == 0 &&
2036              ".cv_fpo_setframe takes no offset");
2037       XTS->emitFPOSetFrame(MI->getOperand(0).getImm());
2038       break;
2039     case X86::SEH_EndPrologue:
2040       XTS->emitFPOEndPrologue();
2041       break;
2042     case X86::SEH_SaveReg:
2043     case X86::SEH_SaveXMM:
2044     case X86::SEH_PushFrame:
2045       llvm_unreachable("SEH_ directive incompatible with FPO");
2046       break;
2047     default:
2048       llvm_unreachable("expected SEH_ instruction");
2049     }
2050     return;
2051   }
2052 
2053   // Otherwise, use the .seh_ directives for all other Windows platforms.
2054   switch (MI->getOpcode()) {
2055   case X86::SEH_PushReg:
2056     OutStreamer->emitWinCFIPushReg(MI->getOperand(0).getImm());
2057     break;
2058 
2059   case X86::SEH_SaveReg:
2060     OutStreamer->emitWinCFISaveReg(MI->getOperand(0).getImm(),
2061                                    MI->getOperand(1).getImm());
2062     break;
2063 
2064   case X86::SEH_SaveXMM:
2065     OutStreamer->emitWinCFISaveXMM(MI->getOperand(0).getImm(),
2066                                    MI->getOperand(1).getImm());
2067     break;
2068 
2069   case X86::SEH_StackAlloc:
2070     OutStreamer->emitWinCFIAllocStack(MI->getOperand(0).getImm());
2071     break;
2072 
2073   case X86::SEH_SetFrame:
2074     OutStreamer->emitWinCFISetFrame(MI->getOperand(0).getImm(),
2075                                     MI->getOperand(1).getImm());
2076     break;
2077 
2078   case X86::SEH_PushFrame:
2079     OutStreamer->emitWinCFIPushFrame(MI->getOperand(0).getImm());
2080     break;
2081 
2082   case X86::SEH_EndPrologue:
2083     OutStreamer->emitWinCFIEndProlog();
2084     break;
2085 
2086   default:
2087     llvm_unreachable("expected SEH_ instruction");
2088   }
2089 }
2090 
2091 static unsigned getRegisterWidth(const MCOperandInfo &Info) {
2092   if (Info.RegClass == X86::VR128RegClassID ||
2093       Info.RegClass == X86::VR128XRegClassID)
2094     return 128;
2095   if (Info.RegClass == X86::VR256RegClassID ||
2096       Info.RegClass == X86::VR256XRegClassID)
2097     return 256;
2098   if (Info.RegClass == X86::VR512RegClassID)
2099     return 512;
2100   llvm_unreachable("Unknown register class!");
2101 }
2102 
2103 static void addConstantComments(const MachineInstr *MI,
2104                                 MCStreamer &OutStreamer) {
2105   switch (MI->getOpcode()) {
2106   // Lower PSHUFB and VPERMILP normally but add a comment if we can find
2107   // a constant shuffle mask. We won't be able to do this at the MC layer
2108   // because the mask isn't an immediate.
2109   case X86::PSHUFBrm:
2110   case X86::VPSHUFBrm:
2111   case X86::VPSHUFBYrm:
2112   case X86::VPSHUFBZ128rm:
2113   case X86::VPSHUFBZ128rmk:
2114   case X86::VPSHUFBZ128rmkz:
2115   case X86::VPSHUFBZ256rm:
2116   case X86::VPSHUFBZ256rmk:
2117   case X86::VPSHUFBZ256rmkz:
2118   case X86::VPSHUFBZrm:
2119   case X86::VPSHUFBZrmk:
2120   case X86::VPSHUFBZrmkz: {
2121     unsigned SrcIdx = 1;
2122     if (X86II::isKMasked(MI->getDesc().TSFlags)) {
2123       // Skip mask operand.
2124       ++SrcIdx;
2125       if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) {
2126         // Skip passthru operand.
2127         ++SrcIdx;
2128       }
2129     }
2130     unsigned MaskIdx = SrcIdx + 1 + X86::AddrDisp;
2131 
2132     assert(MI->getNumOperands() >= (SrcIdx + 1 + X86::AddrNumOperands) &&
2133            "Unexpected number of operands!");
2134 
2135     const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
2136     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2137       unsigned Width = getRegisterWidth(MI->getDesc().operands()[0]);
2138       SmallVector<int, 64> Mask;
2139       DecodePSHUFBMask(C, Width, Mask);
2140       if (!Mask.empty())
2141         OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
2142     }
2143     break;
2144   }
2145 
2146   case X86::VPERMILPSrm:
2147   case X86::VPERMILPSYrm:
2148   case X86::VPERMILPSZ128rm:
2149   case X86::VPERMILPSZ128rmk:
2150   case X86::VPERMILPSZ128rmkz:
2151   case X86::VPERMILPSZ256rm:
2152   case X86::VPERMILPSZ256rmk:
2153   case X86::VPERMILPSZ256rmkz:
2154   case X86::VPERMILPSZrm:
2155   case X86::VPERMILPSZrmk:
2156   case X86::VPERMILPSZrmkz:
2157   case X86::VPERMILPDrm:
2158   case X86::VPERMILPDYrm:
2159   case X86::VPERMILPDZ128rm:
2160   case X86::VPERMILPDZ128rmk:
2161   case X86::VPERMILPDZ128rmkz:
2162   case X86::VPERMILPDZ256rm:
2163   case X86::VPERMILPDZ256rmk:
2164   case X86::VPERMILPDZ256rmkz:
2165   case X86::VPERMILPDZrm:
2166   case X86::VPERMILPDZrmk:
2167   case X86::VPERMILPDZrmkz: {
2168     unsigned ElSize;
2169     switch (MI->getOpcode()) {
2170     default: llvm_unreachable("Invalid opcode");
2171     case X86::VPERMILPSrm:
2172     case X86::VPERMILPSYrm:
2173     case X86::VPERMILPSZ128rm:
2174     case X86::VPERMILPSZ256rm:
2175     case X86::VPERMILPSZrm:
2176     case X86::VPERMILPSZ128rmkz:
2177     case X86::VPERMILPSZ256rmkz:
2178     case X86::VPERMILPSZrmkz:
2179     case X86::VPERMILPSZ128rmk:
2180     case X86::VPERMILPSZ256rmk:
2181     case X86::VPERMILPSZrmk:
2182       ElSize = 32;
2183       break;
2184     case X86::VPERMILPDrm:
2185     case X86::VPERMILPDYrm:
2186     case X86::VPERMILPDZ128rm:
2187     case X86::VPERMILPDZ256rm:
2188     case X86::VPERMILPDZrm:
2189     case X86::VPERMILPDZ128rmkz:
2190     case X86::VPERMILPDZ256rmkz:
2191     case X86::VPERMILPDZrmkz:
2192     case X86::VPERMILPDZ128rmk:
2193     case X86::VPERMILPDZ256rmk:
2194     case X86::VPERMILPDZrmk:
2195       ElSize = 64;
2196       break;
2197     }
2198 
2199     unsigned SrcIdx = 1;
2200     if (X86II::isKMasked(MI->getDesc().TSFlags)) {
2201       // Skip mask operand.
2202       ++SrcIdx;
2203       if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) {
2204         // Skip passthru operand.
2205         ++SrcIdx;
2206       }
2207     }
2208     unsigned MaskIdx = SrcIdx + 1 + X86::AddrDisp;
2209 
2210     assert(MI->getNumOperands() >= (SrcIdx + 1 + X86::AddrNumOperands) &&
2211            "Unexpected number of operands!");
2212 
2213     const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
2214     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2215       unsigned Width = getRegisterWidth(MI->getDesc().operands()[0]);
2216       SmallVector<int, 16> Mask;
2217       DecodeVPERMILPMask(C, ElSize, Width, Mask);
2218       if (!Mask.empty())
2219         OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
2220     }
2221     break;
2222   }
2223 
2224   case X86::VPERMIL2PDrm:
2225   case X86::VPERMIL2PSrm:
2226   case X86::VPERMIL2PDYrm:
2227   case X86::VPERMIL2PSYrm: {
2228     assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands + 1) &&
2229            "Unexpected number of operands!");
2230 
2231     const MachineOperand &CtrlOp = MI->getOperand(MI->getNumOperands() - 1);
2232     if (!CtrlOp.isImm())
2233       break;
2234 
2235     unsigned ElSize;
2236     switch (MI->getOpcode()) {
2237     default: llvm_unreachable("Invalid opcode");
2238     case X86::VPERMIL2PSrm: case X86::VPERMIL2PSYrm: ElSize = 32; break;
2239     case X86::VPERMIL2PDrm: case X86::VPERMIL2PDYrm: ElSize = 64; break;
2240     }
2241 
2242     const MachineOperand &MaskOp = MI->getOperand(3 + X86::AddrDisp);
2243     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2244       unsigned Width = getRegisterWidth(MI->getDesc().operands()[0]);
2245       SmallVector<int, 16> Mask;
2246       DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Width, Mask);
2247       if (!Mask.empty())
2248         OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask));
2249     }
2250     break;
2251   }
2252 
2253   case X86::VPPERMrrm: {
2254     assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands) &&
2255            "Unexpected number of operands!");
2256 
2257     const MachineOperand &MaskOp = MI->getOperand(3 + X86::AddrDisp);
2258     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2259       unsigned Width = getRegisterWidth(MI->getDesc().operands()[0]);
2260       SmallVector<int, 16> Mask;
2261       DecodeVPPERMMask(C, Width, Mask);
2262       if (!Mask.empty())
2263         OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask));
2264     }
2265     break;
2266   }
2267 
2268   case X86::MMX_MOVQ64rm: {
2269     assert(MI->getNumOperands() == (1 + X86::AddrNumOperands) &&
2270            "Unexpected number of operands!");
2271     if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
2272       std::string Comment;
2273       raw_string_ostream CS(Comment);
2274       const MachineOperand &DstOp = MI->getOperand(0);
2275       CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
2276       if (auto *CF = dyn_cast<ConstantFP>(C)) {
2277         CS << "0x" << toString(CF->getValueAPF().bitcastToAPInt(), 16, false);
2278         OutStreamer.AddComment(CS.str());
2279       }
2280     }
2281     break;
2282   }
2283 
2284 #define MOV_CASE(Prefix, Suffix)                                               \
2285   case X86::Prefix##MOVAPD##Suffix##rm:                                        \
2286   case X86::Prefix##MOVAPS##Suffix##rm:                                        \
2287   case X86::Prefix##MOVUPD##Suffix##rm:                                        \
2288   case X86::Prefix##MOVUPS##Suffix##rm:                                        \
2289   case X86::Prefix##MOVDQA##Suffix##rm:                                        \
2290   case X86::Prefix##MOVDQU##Suffix##rm:
2291 
2292 #define MOV_AVX512_CASE(Suffix)                                                \
2293   case X86::VMOVDQA64##Suffix##rm:                                             \
2294   case X86::VMOVDQA32##Suffix##rm:                                             \
2295   case X86::VMOVDQU64##Suffix##rm:                                             \
2296   case X86::VMOVDQU32##Suffix##rm:                                             \
2297   case X86::VMOVDQU16##Suffix##rm:                                             \
2298   case X86::VMOVDQU8##Suffix##rm:                                              \
2299   case X86::VMOVAPS##Suffix##rm:                                               \
2300   case X86::VMOVAPD##Suffix##rm:                                               \
2301   case X86::VMOVUPS##Suffix##rm:                                               \
2302   case X86::VMOVUPD##Suffix##rm:
2303 
2304 #define CASE_ALL_MOV_RM()                                                      \
2305   MOV_CASE(, )   /* SSE */                                                     \
2306   MOV_CASE(V, )  /* AVX-128 */                                                 \
2307   MOV_CASE(V, Y) /* AVX-256 */                                                 \
2308   MOV_AVX512_CASE(Z)                                                           \
2309   MOV_AVX512_CASE(Z256)                                                        \
2310   MOV_AVX512_CASE(Z128)
2311 
2312     // For loads from a constant pool to a vector register, print the constant
2313     // loaded.
2314     CASE_ALL_MOV_RM()
2315   case X86::VBROADCASTF128:
2316   case X86::VBROADCASTI128:
2317   case X86::VBROADCASTF32X4Z256rm:
2318   case X86::VBROADCASTF32X4rm:
2319   case X86::VBROADCASTF32X8rm:
2320   case X86::VBROADCASTF64X2Z128rm:
2321   case X86::VBROADCASTF64X2rm:
2322   case X86::VBROADCASTF64X4rm:
2323   case X86::VBROADCASTI32X4Z256rm:
2324   case X86::VBROADCASTI32X4rm:
2325   case X86::VBROADCASTI32X8rm:
2326   case X86::VBROADCASTI64X2Z128rm:
2327   case X86::VBROADCASTI64X2rm:
2328   case X86::VBROADCASTI64X4rm:
2329     assert(MI->getNumOperands() >= (1 + X86::AddrNumOperands) &&
2330            "Unexpected number of operands!");
2331     if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
2332       int NumLanes = 1;
2333       // Override NumLanes for the broadcast instructions.
2334       switch (MI->getOpcode()) {
2335       case X86::VBROADCASTF128:        NumLanes = 2; break;
2336       case X86::VBROADCASTI128:        NumLanes = 2; break;
2337       case X86::VBROADCASTF32X4Z256rm: NumLanes = 2; break;
2338       case X86::VBROADCASTF32X4rm:     NumLanes = 4; break;
2339       case X86::VBROADCASTF32X8rm:     NumLanes = 2; break;
2340       case X86::VBROADCASTF64X2Z128rm: NumLanes = 2; break;
2341       case X86::VBROADCASTF64X2rm:     NumLanes = 4; break;
2342       case X86::VBROADCASTF64X4rm:     NumLanes = 2; break;
2343       case X86::VBROADCASTI32X4Z256rm: NumLanes = 2; break;
2344       case X86::VBROADCASTI32X4rm:     NumLanes = 4; break;
2345       case X86::VBROADCASTI32X8rm:     NumLanes = 2; break;
2346       case X86::VBROADCASTI64X2Z128rm: NumLanes = 2; break;
2347       case X86::VBROADCASTI64X2rm:     NumLanes = 4; break;
2348       case X86::VBROADCASTI64X4rm:     NumLanes = 2; break;
2349       }
2350 
2351       std::string Comment;
2352       raw_string_ostream CS(Comment);
2353       const MachineOperand &DstOp = MI->getOperand(0);
2354       CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
2355       if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
2356         CS << "[";
2357         for (int l = 0; l != NumLanes; ++l) {
2358           for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements;
2359                ++i) {
2360             if (i != 0 || l != 0)
2361               CS << ",";
2362             if (CDS->getElementType()->isIntegerTy())
2363               printConstant(CDS->getElementAsAPInt(i), CS);
2364             else if (CDS->getElementType()->isHalfTy() ||
2365                      CDS->getElementType()->isFloatTy() ||
2366                      CDS->getElementType()->isDoubleTy())
2367               printConstant(CDS->getElementAsAPFloat(i), CS);
2368             else
2369               CS << "?";
2370           }
2371         }
2372         CS << "]";
2373         OutStreamer.AddComment(CS.str());
2374       } else if (auto *CV = dyn_cast<ConstantVector>(C)) {
2375         CS << "<";
2376         for (int l = 0; l != NumLanes; ++l) {
2377           for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands;
2378                ++i) {
2379             if (i != 0 || l != 0)
2380               CS << ",";
2381             printConstant(CV->getOperand(i), CS);
2382           }
2383         }
2384         CS << ">";
2385         OutStreamer.AddComment(CS.str());
2386       }
2387     }
2388     break;
2389 
2390   case X86::MOVDDUPrm:
2391   case X86::VMOVDDUPrm:
2392   case X86::VMOVDDUPZ128rm:
2393   case X86::VBROADCASTSSrm:
2394   case X86::VBROADCASTSSYrm:
2395   case X86::VBROADCASTSSZ128rm:
2396   case X86::VBROADCASTSSZ256rm:
2397   case X86::VBROADCASTSSZrm:
2398   case X86::VBROADCASTSDYrm:
2399   case X86::VBROADCASTSDZ256rm:
2400   case X86::VBROADCASTSDZrm:
2401   case X86::VPBROADCASTBrm:
2402   case X86::VPBROADCASTBYrm:
2403   case X86::VPBROADCASTBZ128rm:
2404   case X86::VPBROADCASTBZ256rm:
2405   case X86::VPBROADCASTBZrm:
2406   case X86::VPBROADCASTDrm:
2407   case X86::VPBROADCASTDYrm:
2408   case X86::VPBROADCASTDZ128rm:
2409   case X86::VPBROADCASTDZ256rm:
2410   case X86::VPBROADCASTDZrm:
2411   case X86::VPBROADCASTQrm:
2412   case X86::VPBROADCASTQYrm:
2413   case X86::VPBROADCASTQZ128rm:
2414   case X86::VPBROADCASTQZ256rm:
2415   case X86::VPBROADCASTQZrm:
2416   case X86::VPBROADCASTWrm:
2417   case X86::VPBROADCASTWYrm:
2418   case X86::VPBROADCASTWZ128rm:
2419   case X86::VPBROADCASTWZ256rm:
2420   case X86::VPBROADCASTWZrm:
2421     assert(MI->getNumOperands() >= (1 + X86::AddrNumOperands) &&
2422            "Unexpected number of operands!");
2423     if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
2424       int NumElts;
2425       switch (MI->getOpcode()) {
2426       default: llvm_unreachable("Invalid opcode");
2427       case X86::MOVDDUPrm:          NumElts = 2;  break;
2428       case X86::VMOVDDUPrm:         NumElts = 2;  break;
2429       case X86::VMOVDDUPZ128rm:     NumElts = 2;  break;
2430       case X86::VBROADCASTSSrm:     NumElts = 4;  break;
2431       case X86::VBROADCASTSSYrm:    NumElts = 8;  break;
2432       case X86::VBROADCASTSSZ128rm: NumElts = 4;  break;
2433       case X86::VBROADCASTSSZ256rm: NumElts = 8;  break;
2434       case X86::VBROADCASTSSZrm:    NumElts = 16; break;
2435       case X86::VBROADCASTSDYrm:    NumElts = 4;  break;
2436       case X86::VBROADCASTSDZ256rm: NumElts = 4;  break;
2437       case X86::VBROADCASTSDZrm:    NumElts = 8;  break;
2438       case X86::VPBROADCASTBrm:     NumElts = 16; break;
2439       case X86::VPBROADCASTBYrm:    NumElts = 32; break;
2440       case X86::VPBROADCASTBZ128rm: NumElts = 16; break;
2441       case X86::VPBROADCASTBZ256rm: NumElts = 32; break;
2442       case X86::VPBROADCASTBZrm:    NumElts = 64; break;
2443       case X86::VPBROADCASTDrm:     NumElts = 4;  break;
2444       case X86::VPBROADCASTDYrm:    NumElts = 8;  break;
2445       case X86::VPBROADCASTDZ128rm: NumElts = 4;  break;
2446       case X86::VPBROADCASTDZ256rm: NumElts = 8;  break;
2447       case X86::VPBROADCASTDZrm:    NumElts = 16; break;
2448       case X86::VPBROADCASTQrm:     NumElts = 2;  break;
2449       case X86::VPBROADCASTQYrm:    NumElts = 4;  break;
2450       case X86::VPBROADCASTQZ128rm: NumElts = 2;  break;
2451       case X86::VPBROADCASTQZ256rm: NumElts = 4;  break;
2452       case X86::VPBROADCASTQZrm:    NumElts = 8;  break;
2453       case X86::VPBROADCASTWrm:     NumElts = 8;  break;
2454       case X86::VPBROADCASTWYrm:    NumElts = 16; break;
2455       case X86::VPBROADCASTWZ128rm: NumElts = 8;  break;
2456       case X86::VPBROADCASTWZ256rm: NumElts = 16; break;
2457       case X86::VPBROADCASTWZrm:    NumElts = 32; break;
2458       }
2459 
2460       std::string Comment;
2461       raw_string_ostream CS(Comment);
2462       const MachineOperand &DstOp = MI->getOperand(0);
2463       CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
2464       CS << "[";
2465       for (int i = 0; i != NumElts; ++i) {
2466         if (i != 0)
2467           CS << ",";
2468         printConstant(C, CS);
2469       }
2470       CS << "]";
2471       OutStreamer.AddComment(CS.str());
2472     }
2473   }
2474 }
2475 
2476 void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
2477   // FIXME: Enable feature predicate checks once all the test pass.
2478   // X86_MC::verifyInstructionPredicates(MI->getOpcode(),
2479   //                                     Subtarget->getFeatureBits());
2480 
2481   X86MCInstLower MCInstLowering(*MF, *this);
2482   const X86RegisterInfo *RI =
2483       MF->getSubtarget<X86Subtarget>().getRegisterInfo();
2484 
2485   if (MI->getOpcode() == X86::OR64rm) {
2486     for (auto &Opd : MI->operands()) {
2487       if (Opd.isSymbol() && StringRef(Opd.getSymbolName()) ==
2488                                 "swift_async_extendedFramePointerFlags") {
2489         ShouldEmitWeakSwiftAsyncExtendedFramePointerFlags = true;
2490       }
2491     }
2492   }
2493 
2494   // Add a comment about EVEX-2-VEX compression for AVX-512 instrs that
2495   // are compressed from EVEX encoding to VEX encoding.
2496   if (TM.Options.MCOptions.ShowMCEncoding) {
2497     if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX)
2498       OutStreamer->AddComment("EVEX TO VEX Compression ", false);
2499   }
2500 
2501   // Add comments for values loaded from constant pool.
2502   if (OutStreamer->isVerboseAsm())
2503     addConstantComments(MI, *OutStreamer);
2504 
2505   switch (MI->getOpcode()) {
2506   case TargetOpcode::DBG_VALUE:
2507     llvm_unreachable("Should be handled target independently");
2508 
2509   case X86::EH_RETURN:
2510   case X86::EH_RETURN64: {
2511     // Lower these as normal, but add some comments.
2512     Register Reg = MI->getOperand(0).getReg();
2513     OutStreamer->AddComment(StringRef("eh_return, addr: %") +
2514                             X86ATTInstPrinter::getRegisterName(Reg));
2515     break;
2516   }
2517   case X86::CLEANUPRET: {
2518     // Lower these as normal, but add some comments.
2519     OutStreamer->AddComment("CLEANUPRET");
2520     break;
2521   }
2522 
2523   case X86::CATCHRET: {
2524     // Lower these as normal, but add some comments.
2525     OutStreamer->AddComment("CATCHRET");
2526     break;
2527   }
2528 
2529   case X86::ENDBR32:
2530   case X86::ENDBR64: {
2531     // CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for
2532     // -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be
2533     // non-empty. If MI is the initial ENDBR, place the
2534     // __patchable_function_entries label after ENDBR.
2535     if (CurrentPatchableFunctionEntrySym &&
2536         CurrentPatchableFunctionEntrySym == CurrentFnBegin &&
2537         MI == &MF->front().front()) {
2538       MCInst Inst;
2539       MCInstLowering.Lower(MI, Inst);
2540       EmitAndCountInstruction(Inst);
2541       CurrentPatchableFunctionEntrySym = createTempSymbol("patch");
2542       OutStreamer->emitLabel(CurrentPatchableFunctionEntrySym);
2543       return;
2544     }
2545     break;
2546   }
2547 
2548   case X86::TAILJMPd64:
2549     if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(X86::R11))
2550       EmitAndCountInstruction(MCInstBuilder(X86::CS_PREFIX));
2551     [[fallthrough]];
2552   case X86::TAILJMPr:
2553   case X86::TAILJMPm:
2554   case X86::TAILJMPd:
2555   case X86::TAILJMPd_CC:
2556   case X86::TAILJMPr64:
2557   case X86::TAILJMPm64:
2558   case X86::TAILJMPd64_CC:
2559   case X86::TAILJMPr64_REX:
2560   case X86::TAILJMPm64_REX:
2561     // Lower these as normal, but add some comments.
2562     OutStreamer->AddComment("TAILCALL");
2563     break;
2564 
2565   case X86::TLS_addr32:
2566   case X86::TLS_addr64:
2567   case X86::TLS_addrX32:
2568   case X86::TLS_base_addr32:
2569   case X86::TLS_base_addr64:
2570   case X86::TLS_base_addrX32:
2571     return LowerTlsAddr(MCInstLowering, *MI);
2572 
2573   case X86::MOVPC32r: {
2574     // This is a pseudo op for a two instruction sequence with a label, which
2575     // looks like:
2576     //     call "L1$pb"
2577     // "L1$pb":
2578     //     popl %esi
2579 
2580     // Emit the call.
2581     MCSymbol *PICBase = MF->getPICBaseSymbol();
2582     // FIXME: We would like an efficient form for this, so we don't have to do a
2583     // lot of extra uniquing.
2584     EmitAndCountInstruction(
2585         MCInstBuilder(X86::CALLpcrel32)
2586             .addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
2587 
2588     const X86FrameLowering *FrameLowering =
2589         MF->getSubtarget<X86Subtarget>().getFrameLowering();
2590     bool hasFP = FrameLowering->hasFP(*MF);
2591 
2592     // TODO: This is needed only if we require precise CFA.
2593     bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() &&
2594                                !OutStreamer->getDwarfFrameInfos().back().End;
2595 
2596     int stackGrowth = -RI->getSlotSize();
2597 
2598     if (HasActiveDwarfFrame && !hasFP) {
2599       OutStreamer->emitCFIAdjustCfaOffset(-stackGrowth);
2600     }
2601 
2602     // Emit the label.
2603     OutStreamer->emitLabel(PICBase);
2604 
2605     // popl $reg
2606     EmitAndCountInstruction(
2607         MCInstBuilder(X86::POP32r).addReg(MI->getOperand(0).getReg()));
2608 
2609     if (HasActiveDwarfFrame && !hasFP) {
2610       OutStreamer->emitCFIAdjustCfaOffset(stackGrowth);
2611     }
2612     return;
2613   }
2614 
2615   case X86::ADD32ri: {
2616     // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
2617     if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS)
2618       break;
2619 
2620     // Okay, we have something like:
2621     //  EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL)
2622 
2623     // For this, we want to print something like:
2624     //   MYGLOBAL + (. - PICBASE)
2625     // However, we can't generate a ".", so just emit a new label here and refer
2626     // to it.
2627     MCSymbol *DotSym = OutContext.createTempSymbol();
2628     OutStreamer->emitLabel(DotSym);
2629 
2630     // Now that we have emitted the label, lower the complex operand expression.
2631     MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2));
2632 
2633     const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext);
2634     const MCExpr *PICBase =
2635         MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext);
2636     DotExpr = MCBinaryExpr::createSub(DotExpr, PICBase, OutContext);
2637 
2638     DotExpr = MCBinaryExpr::createAdd(
2639         MCSymbolRefExpr::create(OpSym, OutContext), DotExpr, OutContext);
2640 
2641     EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri)
2642                                 .addReg(MI->getOperand(0).getReg())
2643                                 .addReg(MI->getOperand(1).getReg())
2644                                 .addExpr(DotExpr));
2645     return;
2646   }
2647   case TargetOpcode::STATEPOINT:
2648     return LowerSTATEPOINT(*MI, MCInstLowering);
2649 
2650   case TargetOpcode::FAULTING_OP:
2651     return LowerFAULTING_OP(*MI, MCInstLowering);
2652 
2653   case TargetOpcode::FENTRY_CALL:
2654     return LowerFENTRY_CALL(*MI, MCInstLowering);
2655 
2656   case TargetOpcode::PATCHABLE_OP:
2657     return LowerPATCHABLE_OP(*MI, MCInstLowering);
2658 
2659   case TargetOpcode::STACKMAP:
2660     return LowerSTACKMAP(*MI);
2661 
2662   case TargetOpcode::PATCHPOINT:
2663     return LowerPATCHPOINT(*MI, MCInstLowering);
2664 
2665   case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
2666     return LowerPATCHABLE_FUNCTION_ENTER(*MI, MCInstLowering);
2667 
2668   case TargetOpcode::PATCHABLE_RET:
2669     return LowerPATCHABLE_RET(*MI, MCInstLowering);
2670 
2671   case TargetOpcode::PATCHABLE_TAIL_CALL:
2672     return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering);
2673 
2674   case TargetOpcode::PATCHABLE_EVENT_CALL:
2675     return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering);
2676 
2677   case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
2678     return LowerPATCHABLE_TYPED_EVENT_CALL(*MI, MCInstLowering);
2679 
2680   case X86::MORESTACK_RET:
2681     EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
2682     return;
2683 
2684   case X86::KCFI_CHECK:
2685     return LowerKCFI_CHECK(*MI);
2686 
2687   case X86::ASAN_CHECK_MEMACCESS:
2688     return LowerASAN_CHECK_MEMACCESS(*MI);
2689 
2690   case X86::MORESTACK_RET_RESTORE_R10:
2691     // Return, then restore R10.
2692     EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
2693     EmitAndCountInstruction(
2694         MCInstBuilder(X86::MOV64rr).addReg(X86::R10).addReg(X86::RAX));
2695     return;
2696 
2697   case X86::SEH_PushReg:
2698   case X86::SEH_SaveReg:
2699   case X86::SEH_SaveXMM:
2700   case X86::SEH_StackAlloc:
2701   case X86::SEH_StackAlign:
2702   case X86::SEH_SetFrame:
2703   case X86::SEH_PushFrame:
2704   case X86::SEH_EndPrologue:
2705     EmitSEHInstruction(MI);
2706     return;
2707 
2708   case X86::SEH_Epilogue: {
2709     assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
2710     MachineBasicBlock::const_iterator MBBI(MI);
2711     // Check if preceded by a call and emit nop if so.
2712     for (MBBI = PrevCrossBBInst(MBBI);
2713          MBBI != MachineBasicBlock::const_iterator();
2714          MBBI = PrevCrossBBInst(MBBI)) {
2715       // Conservatively assume that pseudo instructions don't emit code and keep
2716       // looking for a call. We may emit an unnecessary nop in some cases.
2717       if (!MBBI->isPseudo()) {
2718         if (MBBI->isCall())
2719           EmitAndCountInstruction(MCInstBuilder(X86::NOOP));
2720         break;
2721       }
2722     }
2723     return;
2724   }
2725   case X86::UBSAN_UD1:
2726     EmitAndCountInstruction(MCInstBuilder(X86::UD1Lm)
2727                                 .addReg(X86::EAX)
2728                                 .addReg(X86::EAX)
2729                                 .addImm(1)
2730                                 .addReg(X86::NoRegister)
2731                                 .addImm(MI->getOperand(0).getImm())
2732                                 .addReg(X86::NoRegister));
2733     return;
2734   case X86::CALL64pcrel32:
2735     if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(X86::R11))
2736       EmitAndCountInstruction(MCInstBuilder(X86::CS_PREFIX));
2737     break;
2738   }
2739 
2740   MCInst TmpInst;
2741   MCInstLowering.Lower(MI, TmpInst);
2742 
2743   // Stackmap shadows cannot include branch targets, so we can count the bytes
2744   // in a call towards the shadow, but must ensure that the no thread returns
2745   // in to the stackmap shadow.  The only way to achieve this is if the call
2746   // is at the end of the shadow.
2747   if (MI->isCall()) {
2748     // Count then size of the call towards the shadow
2749     SMShadowTracker.count(TmpInst, getSubtargetInfo(), CodeEmitter.get());
2750     // Then flush the shadow so that we fill with nops before the call, not
2751     // after it.
2752     SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
2753     // Then emit the call
2754     OutStreamer->emitInstruction(TmpInst, getSubtargetInfo());
2755     return;
2756   }
2757 
2758   EmitAndCountInstruction(TmpInst);
2759 }
2760