1 //===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains code to lower X86 MachineInstrs to their corresponding
10 // MCInst records.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "MCTargetDesc/X86ATTInstPrinter.h"
15 #include "MCTargetDesc/X86BaseInfo.h"
16 #include "MCTargetDesc/X86InstComments.h"
17 #include "MCTargetDesc/X86ShuffleDecode.h"
18 #include "MCTargetDesc/X86TargetStreamer.h"
19 #include "X86AsmPrinter.h"
20 #include "X86RegisterInfo.h"
21 #include "X86ShuffleDecodeConstantPool.h"
22 #include "X86Subtarget.h"
23 #include "llvm/ADT/Optional.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/iterator_range.h"
26 #include "llvm/CodeGen/MachineConstantPool.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
29 #include "llvm/CodeGen/MachineOperand.h"
30 #include "llvm/CodeGen/StackMaps.h"
31 #include "llvm/IR/DataLayout.h"
32 #include "llvm/IR/GlobalValue.h"
33 #include "llvm/IR/Mangler.h"
34 #include "llvm/MC/MCAsmInfo.h"
35 #include "llvm/MC/MCCodeEmitter.h"
36 #include "llvm/MC/MCContext.h"
37 #include "llvm/MC/MCExpr.h"
38 #include "llvm/MC/MCFixup.h"
39 #include "llvm/MC/MCInst.h"
40 #include "llvm/MC/MCInstBuilder.h"
41 #include "llvm/MC/MCSection.h"
42 #include "llvm/MC/MCSectionELF.h"
43 #include "llvm/MC/MCStreamer.h"
44 #include "llvm/MC/MCSymbol.h"
45 #include "llvm/MC/MCSymbolELF.h"
46 #include "llvm/MC/TargetRegistry.h"
47 #include "llvm/Target/TargetLoweringObjectFile.h"
48 #include "llvm/Target/TargetMachine.h"
49 #include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
50 #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
51 #include <string>
52 
53 using namespace llvm;
54 
55 namespace {
56 
57 /// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
58 class X86MCInstLower {
59   MCContext &Ctx;
60   const MachineFunction &MF;
61   const TargetMachine &TM;
62   const MCAsmInfo &MAI;
63   X86AsmPrinter &AsmPrinter;
64 
65 public:
66   X86MCInstLower(const MachineFunction &MF, X86AsmPrinter &asmprinter);
67 
68   Optional<MCOperand> LowerMachineOperand(const MachineInstr *MI,
69                                           const MachineOperand &MO) const;
70   void Lower(const MachineInstr *MI, MCInst &OutMI) const;
71 
72   MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const;
73   MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
74 
75 private:
76   MachineModuleInfoMachO &getMachOMMI() const;
77 };
78 
79 } // end anonymous namespace
80 
81 /// A RAII helper which defines a region of instructions which can't have
82 /// padding added between them for correctness.
83 struct NoAutoPaddingScope {
84   MCStreamer &OS;
85   const bool OldAllowAutoPadding;
86   NoAutoPaddingScope(MCStreamer &OS)
87       : OS(OS), OldAllowAutoPadding(OS.getAllowAutoPadding()) {
88     changeAndComment(false);
89   }
90   ~NoAutoPaddingScope() { changeAndComment(OldAllowAutoPadding); }
91   void changeAndComment(bool b) {
92     if (b == OS.getAllowAutoPadding())
93       return;
94     OS.setAllowAutoPadding(b);
95     if (b)
96       OS.emitRawComment("autopadding");
97     else
98       OS.emitRawComment("noautopadding");
99   }
100 };
101 
102 // Emit a minimal sequence of nops spanning NumBytes bytes.
103 static void emitX86Nops(MCStreamer &OS, unsigned NumBytes,
104                         const X86Subtarget *Subtarget);
105 
106 void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst,
107                                                  const MCSubtargetInfo &STI,
108                                                  MCCodeEmitter *CodeEmitter) {
109   if (InShadow) {
110     SmallString<256> Code;
111     SmallVector<MCFixup, 4> Fixups;
112     raw_svector_ostream VecOS(Code);
113     CodeEmitter->encodeInstruction(Inst, VecOS, Fixups, STI);
114     CurrentShadowSize += Code.size();
115     if (CurrentShadowSize >= RequiredShadowSize)
116       InShadow = false; // The shadow is big enough. Stop counting.
117   }
118 }
119 
120 void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding(
121     MCStreamer &OutStreamer, const MCSubtargetInfo &STI) {
122   if (InShadow && CurrentShadowSize < RequiredShadowSize) {
123     InShadow = false;
124     emitX86Nops(OutStreamer, RequiredShadowSize - CurrentShadowSize,
125                 &MF->getSubtarget<X86Subtarget>());
126   }
127 }
128 
129 void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) {
130   OutStreamer->emitInstruction(Inst, getSubtargetInfo());
131   SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get());
132 }
133 
134 X86MCInstLower::X86MCInstLower(const MachineFunction &mf,
135                                X86AsmPrinter &asmprinter)
136     : Ctx(mf.getContext()), MF(mf), TM(mf.getTarget()), MAI(*TM.getMCAsmInfo()),
137       AsmPrinter(asmprinter) {}
138 
139 MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
140   return MF.getMMI().getObjFileInfo<MachineModuleInfoMachO>();
141 }
142 
143 /// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol
144 /// operand to an MCSymbol.
145 MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const {
146   const Triple &TT = TM.getTargetTriple();
147   if (MO.isGlobal() && TT.isOSBinFormatELF())
148     return AsmPrinter.getSymbolPreferLocal(*MO.getGlobal());
149 
150   const DataLayout &DL = MF.getDataLayout();
151   assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) &&
152          "Isn't a symbol reference");
153 
154   MCSymbol *Sym = nullptr;
155   SmallString<128> Name;
156   StringRef Suffix;
157 
158   switch (MO.getTargetFlags()) {
159   case X86II::MO_DLLIMPORT:
160     // Handle dllimport linkage.
161     Name += "__imp_";
162     break;
163   case X86II::MO_COFFSTUB:
164     Name += ".refptr.";
165     break;
166   case X86II::MO_DARWIN_NONLAZY:
167   case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
168     Suffix = "$non_lazy_ptr";
169     break;
170   }
171 
172   if (!Suffix.empty())
173     Name += DL.getPrivateGlobalPrefix();
174 
175   if (MO.isGlobal()) {
176     const GlobalValue *GV = MO.getGlobal();
177     AsmPrinter.getNameWithPrefix(Name, GV);
178   } else if (MO.isSymbol()) {
179     Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL);
180   } else if (MO.isMBB()) {
181     assert(Suffix.empty());
182     Sym = MO.getMBB()->getSymbol();
183   }
184 
185   Name += Suffix;
186   if (!Sym)
187     Sym = Ctx.getOrCreateSymbol(Name);
188 
189   // If the target flags on the operand changes the name of the symbol, do that
190   // before we return the symbol.
191   switch (MO.getTargetFlags()) {
192   default:
193     break;
194   case X86II::MO_COFFSTUB: {
195     MachineModuleInfoCOFF &MMICOFF =
196         MF.getMMI().getObjFileInfo<MachineModuleInfoCOFF>();
197     MachineModuleInfoImpl::StubValueTy &StubSym = MMICOFF.getGVStubEntry(Sym);
198     if (!StubSym.getPointer()) {
199       assert(MO.isGlobal() && "Extern symbol not handled yet");
200       StubSym = MachineModuleInfoImpl::StubValueTy(
201           AsmPrinter.getSymbol(MO.getGlobal()), true);
202     }
203     break;
204   }
205   case X86II::MO_DARWIN_NONLAZY:
206   case X86II::MO_DARWIN_NONLAZY_PIC_BASE: {
207     MachineModuleInfoImpl::StubValueTy &StubSym =
208         getMachOMMI().getGVStubEntry(Sym);
209     if (!StubSym.getPointer()) {
210       assert(MO.isGlobal() && "Extern symbol not handled yet");
211       StubSym = MachineModuleInfoImpl::StubValueTy(
212           AsmPrinter.getSymbol(MO.getGlobal()),
213           !MO.getGlobal()->hasInternalLinkage());
214     }
215     break;
216   }
217   }
218 
219   return Sym;
220 }
221 
222 MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
223                                              MCSymbol *Sym) const {
224   // FIXME: We would like an efficient form for this, so we don't have to do a
225   // lot of extra uniquing.
226   const MCExpr *Expr = nullptr;
227   MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
228 
229   switch (MO.getTargetFlags()) {
230   default:
231     llvm_unreachable("Unknown target flag on GV operand");
232   case X86II::MO_NO_FLAG: // No flag.
233   // These affect the name of the symbol, not any suffix.
234   case X86II::MO_DARWIN_NONLAZY:
235   case X86II::MO_DLLIMPORT:
236   case X86II::MO_COFFSTUB:
237     break;
238 
239   case X86II::MO_TLVP:
240     RefKind = MCSymbolRefExpr::VK_TLVP;
241     break;
242   case X86II::MO_TLVP_PIC_BASE:
243     Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
244     // Subtract the pic base.
245     Expr = MCBinaryExpr::createSub(
246         Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx);
247     break;
248   case X86II::MO_SECREL:
249     RefKind = MCSymbolRefExpr::VK_SECREL;
250     break;
251   case X86II::MO_TLSGD:
252     RefKind = MCSymbolRefExpr::VK_TLSGD;
253     break;
254   case X86II::MO_TLSLD:
255     RefKind = MCSymbolRefExpr::VK_TLSLD;
256     break;
257   case X86II::MO_TLSLDM:
258     RefKind = MCSymbolRefExpr::VK_TLSLDM;
259     break;
260   case X86II::MO_GOTTPOFF:
261     RefKind = MCSymbolRefExpr::VK_GOTTPOFF;
262     break;
263   case X86II::MO_INDNTPOFF:
264     RefKind = MCSymbolRefExpr::VK_INDNTPOFF;
265     break;
266   case X86II::MO_TPOFF:
267     RefKind = MCSymbolRefExpr::VK_TPOFF;
268     break;
269   case X86II::MO_DTPOFF:
270     RefKind = MCSymbolRefExpr::VK_DTPOFF;
271     break;
272   case X86II::MO_NTPOFF:
273     RefKind = MCSymbolRefExpr::VK_NTPOFF;
274     break;
275   case X86II::MO_GOTNTPOFF:
276     RefKind = MCSymbolRefExpr::VK_GOTNTPOFF;
277     break;
278   case X86II::MO_GOTPCREL:
279     RefKind = MCSymbolRefExpr::VK_GOTPCREL;
280     break;
281   case X86II::MO_GOTPCREL_NORELAX:
282     RefKind = MCSymbolRefExpr::VK_GOTPCREL_NORELAX;
283     break;
284   case X86II::MO_GOT:
285     RefKind = MCSymbolRefExpr::VK_GOT;
286     break;
287   case X86II::MO_GOTOFF:
288     RefKind = MCSymbolRefExpr::VK_GOTOFF;
289     break;
290   case X86II::MO_PLT:
291     RefKind = MCSymbolRefExpr::VK_PLT;
292     break;
293   case X86II::MO_ABS8:
294     RefKind = MCSymbolRefExpr::VK_X86_ABS8;
295     break;
296   case X86II::MO_PIC_BASE_OFFSET:
297   case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
298     Expr = MCSymbolRefExpr::create(Sym, Ctx);
299     // Subtract the pic base.
300     Expr = MCBinaryExpr::createSub(
301         Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx);
302     if (MO.isJTI()) {
303       assert(MAI.doesSetDirectiveSuppressReloc());
304       // If .set directive is supported, use it to reduce the number of
305       // relocations the assembler will generate for differences between
306       // local labels. This is only safe when the symbols are in the same
307       // section so we are restricting it to jumptable references.
308       MCSymbol *Label = Ctx.createTempSymbol();
309       AsmPrinter.OutStreamer->emitAssignment(Label, Expr);
310       Expr = MCSymbolRefExpr::create(Label, Ctx);
311     }
312     break;
313   }
314 
315   if (!Expr)
316     Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx);
317 
318   if (!MO.isJTI() && !MO.isMBB() && MO.getOffset())
319     Expr = MCBinaryExpr::createAdd(
320         Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
321   return MCOperand::createExpr(Expr);
322 }
323 
324 /// Simplify FOO $imm, %{al,ax,eax,rax} to FOO $imm, for instruction with
325 /// a short fixed-register form.
326 static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) {
327   unsigned ImmOp = Inst.getNumOperands() - 1;
328   assert(Inst.getOperand(0).isReg() &&
329          (Inst.getOperand(ImmOp).isImm() || Inst.getOperand(ImmOp).isExpr()) &&
330          ((Inst.getNumOperands() == 3 && Inst.getOperand(1).isReg() &&
331            Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) ||
332           Inst.getNumOperands() == 2) &&
333          "Unexpected instruction!");
334 
335   // Check whether the destination register can be fixed.
336   unsigned Reg = Inst.getOperand(0).getReg();
337   if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
338     return;
339 
340   // If so, rewrite the instruction.
341   MCOperand Saved = Inst.getOperand(ImmOp);
342   Inst = MCInst();
343   Inst.setOpcode(Opcode);
344   Inst.addOperand(Saved);
345 }
346 
347 /// If a movsx instruction has a shorter encoding for the used register
348 /// simplify the instruction to use it instead.
349 static void SimplifyMOVSX(MCInst &Inst) {
350   unsigned NewOpcode = 0;
351   unsigned Op0 = Inst.getOperand(0).getReg(), Op1 = Inst.getOperand(1).getReg();
352   switch (Inst.getOpcode()) {
353   default:
354     llvm_unreachable("Unexpected instruction!");
355   case X86::MOVSX16rr8: // movsbw %al, %ax   --> cbtw
356     if (Op0 == X86::AX && Op1 == X86::AL)
357       NewOpcode = X86::CBW;
358     break;
359   case X86::MOVSX32rr16: // movswl %ax, %eax  --> cwtl
360     if (Op0 == X86::EAX && Op1 == X86::AX)
361       NewOpcode = X86::CWDE;
362     break;
363   case X86::MOVSX64rr32: // movslq %eax, %rax --> cltq
364     if (Op0 == X86::RAX && Op1 == X86::EAX)
365       NewOpcode = X86::CDQE;
366     break;
367   }
368 
369   if (NewOpcode != 0) {
370     Inst = MCInst();
371     Inst.setOpcode(NewOpcode);
372   }
373 }
374 
375 /// Simplify things like MOV32rm to MOV32o32a.
376 static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
377                                   unsigned Opcode) {
378   // Don't make these simplifications in 64-bit mode; other assemblers don't
379   // perform them because they make the code larger.
380   if (Printer.getSubtarget().is64Bit())
381     return;
382 
383   bool IsStore = Inst.getOperand(0).isReg() && Inst.getOperand(1).isReg();
384   unsigned AddrBase = IsStore;
385   unsigned RegOp = IsStore ? 0 : 5;
386   unsigned AddrOp = AddrBase + 3;
387   assert(
388       Inst.getNumOperands() == 6 && Inst.getOperand(RegOp).isReg() &&
389       Inst.getOperand(AddrBase + X86::AddrBaseReg).isReg() &&
390       Inst.getOperand(AddrBase + X86::AddrScaleAmt).isImm() &&
391       Inst.getOperand(AddrBase + X86::AddrIndexReg).isReg() &&
392       Inst.getOperand(AddrBase + X86::AddrSegmentReg).isReg() &&
393       (Inst.getOperand(AddrOp).isExpr() || Inst.getOperand(AddrOp).isImm()) &&
394       "Unexpected instruction!");
395 
396   // Check whether the destination register can be fixed.
397   unsigned Reg = Inst.getOperand(RegOp).getReg();
398   if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
399     return;
400 
401   // Check whether this is an absolute address.
402   // FIXME: We know TLVP symbol refs aren't, but there should be a better way
403   // to do this here.
404   bool Absolute = true;
405   if (Inst.getOperand(AddrOp).isExpr()) {
406     const MCExpr *MCE = Inst.getOperand(AddrOp).getExpr();
407     if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(MCE))
408       if (SRE->getKind() == MCSymbolRefExpr::VK_TLVP)
409         Absolute = false;
410   }
411 
412   if (Absolute &&
413       (Inst.getOperand(AddrBase + X86::AddrBaseReg).getReg() != 0 ||
414        Inst.getOperand(AddrBase + X86::AddrScaleAmt).getImm() != 1 ||
415        Inst.getOperand(AddrBase + X86::AddrIndexReg).getReg() != 0))
416     return;
417 
418   // If so, rewrite the instruction.
419   MCOperand Saved = Inst.getOperand(AddrOp);
420   MCOperand Seg = Inst.getOperand(AddrBase + X86::AddrSegmentReg);
421   Inst = MCInst();
422   Inst.setOpcode(Opcode);
423   Inst.addOperand(Saved);
424   Inst.addOperand(Seg);
425 }
426 
427 static unsigned getRetOpcode(const X86Subtarget &Subtarget) {
428   return Subtarget.is64Bit() ? X86::RET64 : X86::RET32;
429 }
430 
431 Optional<MCOperand>
432 X86MCInstLower::LowerMachineOperand(const MachineInstr *MI,
433                                     const MachineOperand &MO) const {
434   switch (MO.getType()) {
435   default:
436     MI->print(errs());
437     llvm_unreachable("unknown operand type");
438   case MachineOperand::MO_Register:
439     // Ignore all implicit register operands.
440     if (MO.isImplicit())
441       return None;
442     return MCOperand::createReg(MO.getReg());
443   case MachineOperand::MO_Immediate:
444     return MCOperand::createImm(MO.getImm());
445   case MachineOperand::MO_MachineBasicBlock:
446   case MachineOperand::MO_GlobalAddress:
447   case MachineOperand::MO_ExternalSymbol:
448     return LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
449   case MachineOperand::MO_MCSymbol:
450     return LowerSymbolOperand(MO, MO.getMCSymbol());
451   case MachineOperand::MO_JumpTableIndex:
452     return LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()));
453   case MachineOperand::MO_ConstantPoolIndex:
454     return LowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex()));
455   case MachineOperand::MO_BlockAddress:
456     return LowerSymbolOperand(
457         MO, AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()));
458   case MachineOperand::MO_RegisterMask:
459     // Ignore call clobbers.
460     return None;
461   }
462 }
463 
464 // Replace TAILJMP opcodes with their equivalent opcodes that have encoding
465 // information.
466 static unsigned convertTailJumpOpcode(unsigned Opcode) {
467   switch (Opcode) {
468   case X86::TAILJMPr:
469     Opcode = X86::JMP32r;
470     break;
471   case X86::TAILJMPm:
472     Opcode = X86::JMP32m;
473     break;
474   case X86::TAILJMPr64:
475     Opcode = X86::JMP64r;
476     break;
477   case X86::TAILJMPm64:
478     Opcode = X86::JMP64m;
479     break;
480   case X86::TAILJMPr64_REX:
481     Opcode = X86::JMP64r_REX;
482     break;
483   case X86::TAILJMPm64_REX:
484     Opcode = X86::JMP64m_REX;
485     break;
486   case X86::TAILJMPd:
487   case X86::TAILJMPd64:
488     Opcode = X86::JMP_1;
489     break;
490   case X86::TAILJMPd_CC:
491   case X86::TAILJMPd64_CC:
492     Opcode = X86::JCC_1;
493     break;
494   }
495 
496   return Opcode;
497 }
498 
499 void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
500   OutMI.setOpcode(MI->getOpcode());
501 
502   for (const MachineOperand &MO : MI->operands())
503     if (auto MaybeMCOp = LowerMachineOperand(MI, MO))
504       OutMI.addOperand(*MaybeMCOp);
505 
506   // Handle a few special cases to eliminate operand modifiers.
507   switch (OutMI.getOpcode()) {
508   case X86::LEA64_32r:
509   case X86::LEA64r:
510   case X86::LEA16r:
511   case X86::LEA32r:
512     // LEA should have a segment register, but it must be empty.
513     assert(OutMI.getNumOperands() == 1 + X86::AddrNumOperands &&
514            "Unexpected # of LEA operands");
515     assert(OutMI.getOperand(1 + X86::AddrSegmentReg).getReg() == 0 &&
516            "LEA has segment specified!");
517     break;
518 
519   case X86::MULX32Hrr:
520   case X86::MULX32Hrm:
521   case X86::MULX64Hrr:
522   case X86::MULX64Hrm: {
523     // Turn into regular MULX by duplicating the destination.
524     unsigned NewOpc;
525     switch (OutMI.getOpcode()) {
526     default: llvm_unreachable("Invalid opcode");
527     case X86::MULX32Hrr: NewOpc = X86::MULX32rr; break;
528     case X86::MULX32Hrm: NewOpc = X86::MULX32rm; break;
529     case X86::MULX64Hrr: NewOpc = X86::MULX64rr; break;
530     case X86::MULX64Hrm: NewOpc = X86::MULX64rm; break;
531     }
532     OutMI.setOpcode(NewOpc);
533     // Duplicate the destination.
534     unsigned DestReg = OutMI.getOperand(0).getReg();
535     OutMI.insert(OutMI.begin(), MCOperand::createReg(DestReg));
536     break;
537   }
538 
539   // Commute operands to get a smaller encoding by using VEX.R instead of VEX.B
540   // if one of the registers is extended, but other isn't.
541   case X86::VMOVZPQILo2PQIrr:
542   case X86::VMOVAPDrr:
543   case X86::VMOVAPDYrr:
544   case X86::VMOVAPSrr:
545   case X86::VMOVAPSYrr:
546   case X86::VMOVDQArr:
547   case X86::VMOVDQAYrr:
548   case X86::VMOVDQUrr:
549   case X86::VMOVDQUYrr:
550   case X86::VMOVUPDrr:
551   case X86::VMOVUPDYrr:
552   case X86::VMOVUPSrr:
553   case X86::VMOVUPSYrr: {
554     if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
555         X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg())) {
556       unsigned NewOpc;
557       switch (OutMI.getOpcode()) {
558       default: llvm_unreachable("Invalid opcode");
559       case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr;   break;
560       case X86::VMOVAPDrr:        NewOpc = X86::VMOVAPDrr_REV;  break;
561       case X86::VMOVAPDYrr:       NewOpc = X86::VMOVAPDYrr_REV; break;
562       case X86::VMOVAPSrr:        NewOpc = X86::VMOVAPSrr_REV;  break;
563       case X86::VMOVAPSYrr:       NewOpc = X86::VMOVAPSYrr_REV; break;
564       case X86::VMOVDQArr:        NewOpc = X86::VMOVDQArr_REV;  break;
565       case X86::VMOVDQAYrr:       NewOpc = X86::VMOVDQAYrr_REV; break;
566       case X86::VMOVDQUrr:        NewOpc = X86::VMOVDQUrr_REV;  break;
567       case X86::VMOVDQUYrr:       NewOpc = X86::VMOVDQUYrr_REV; break;
568       case X86::VMOVUPDrr:        NewOpc = X86::VMOVUPDrr_REV;  break;
569       case X86::VMOVUPDYrr:       NewOpc = X86::VMOVUPDYrr_REV; break;
570       case X86::VMOVUPSrr:        NewOpc = X86::VMOVUPSrr_REV;  break;
571       case X86::VMOVUPSYrr:       NewOpc = X86::VMOVUPSYrr_REV; break;
572       }
573       OutMI.setOpcode(NewOpc);
574     }
575     break;
576   }
577   case X86::VMOVSDrr:
578   case X86::VMOVSSrr: {
579     if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
580         X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
581       unsigned NewOpc;
582       switch (OutMI.getOpcode()) {
583       default: llvm_unreachable("Invalid opcode");
584       case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
585       case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
586       }
587       OutMI.setOpcode(NewOpc);
588     }
589     break;
590   }
591 
592   case X86::VPCMPBZ128rmi:  case X86::VPCMPBZ128rmik:
593   case X86::VPCMPBZ128rri:  case X86::VPCMPBZ128rrik:
594   case X86::VPCMPBZ256rmi:  case X86::VPCMPBZ256rmik:
595   case X86::VPCMPBZ256rri:  case X86::VPCMPBZ256rrik:
596   case X86::VPCMPBZrmi:     case X86::VPCMPBZrmik:
597   case X86::VPCMPBZrri:     case X86::VPCMPBZrrik:
598   case X86::VPCMPDZ128rmi:  case X86::VPCMPDZ128rmik:
599   case X86::VPCMPDZ128rmib: case X86::VPCMPDZ128rmibk:
600   case X86::VPCMPDZ128rri:  case X86::VPCMPDZ128rrik:
601   case X86::VPCMPDZ256rmi:  case X86::VPCMPDZ256rmik:
602   case X86::VPCMPDZ256rmib: case X86::VPCMPDZ256rmibk:
603   case X86::VPCMPDZ256rri:  case X86::VPCMPDZ256rrik:
604   case X86::VPCMPDZrmi:     case X86::VPCMPDZrmik:
605   case X86::VPCMPDZrmib:    case X86::VPCMPDZrmibk:
606   case X86::VPCMPDZrri:     case X86::VPCMPDZrrik:
607   case X86::VPCMPQZ128rmi:  case X86::VPCMPQZ128rmik:
608   case X86::VPCMPQZ128rmib: case X86::VPCMPQZ128rmibk:
609   case X86::VPCMPQZ128rri:  case X86::VPCMPQZ128rrik:
610   case X86::VPCMPQZ256rmi:  case X86::VPCMPQZ256rmik:
611   case X86::VPCMPQZ256rmib: case X86::VPCMPQZ256rmibk:
612   case X86::VPCMPQZ256rri:  case X86::VPCMPQZ256rrik:
613   case X86::VPCMPQZrmi:     case X86::VPCMPQZrmik:
614   case X86::VPCMPQZrmib:    case X86::VPCMPQZrmibk:
615   case X86::VPCMPQZrri:     case X86::VPCMPQZrrik:
616   case X86::VPCMPWZ128rmi:  case X86::VPCMPWZ128rmik:
617   case X86::VPCMPWZ128rri:  case X86::VPCMPWZ128rrik:
618   case X86::VPCMPWZ256rmi:  case X86::VPCMPWZ256rmik:
619   case X86::VPCMPWZ256rri:  case X86::VPCMPWZ256rrik:
620   case X86::VPCMPWZrmi:     case X86::VPCMPWZrmik:
621   case X86::VPCMPWZrri:     case X86::VPCMPWZrrik: {
622     // Turn immediate 0 into the VPCMPEQ instruction.
623     if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 0) {
624       unsigned NewOpc;
625       switch (OutMI.getOpcode()) {
626       default: llvm_unreachable("Invalid opcode");
627       case X86::VPCMPBZ128rmi:   NewOpc = X86::VPCMPEQBZ128rm;   break;
628       case X86::VPCMPBZ128rmik:  NewOpc = X86::VPCMPEQBZ128rmk;  break;
629       case X86::VPCMPBZ128rri:   NewOpc = X86::VPCMPEQBZ128rr;   break;
630       case X86::VPCMPBZ128rrik:  NewOpc = X86::VPCMPEQBZ128rrk;  break;
631       case X86::VPCMPBZ256rmi:   NewOpc = X86::VPCMPEQBZ256rm;   break;
632       case X86::VPCMPBZ256rmik:  NewOpc = X86::VPCMPEQBZ256rmk;  break;
633       case X86::VPCMPBZ256rri:   NewOpc = X86::VPCMPEQBZ256rr;   break;
634       case X86::VPCMPBZ256rrik:  NewOpc = X86::VPCMPEQBZ256rrk;  break;
635       case X86::VPCMPBZrmi:      NewOpc = X86::VPCMPEQBZrm;      break;
636       case X86::VPCMPBZrmik:     NewOpc = X86::VPCMPEQBZrmk;     break;
637       case X86::VPCMPBZrri:      NewOpc = X86::VPCMPEQBZrr;      break;
638       case X86::VPCMPBZrrik:     NewOpc = X86::VPCMPEQBZrrk;     break;
639       case X86::VPCMPDZ128rmi:   NewOpc = X86::VPCMPEQDZ128rm;   break;
640       case X86::VPCMPDZ128rmib:  NewOpc = X86::VPCMPEQDZ128rmb;  break;
641       case X86::VPCMPDZ128rmibk: NewOpc = X86::VPCMPEQDZ128rmbk; break;
642       case X86::VPCMPDZ128rmik:  NewOpc = X86::VPCMPEQDZ128rmk;  break;
643       case X86::VPCMPDZ128rri:   NewOpc = X86::VPCMPEQDZ128rr;   break;
644       case X86::VPCMPDZ128rrik:  NewOpc = X86::VPCMPEQDZ128rrk;  break;
645       case X86::VPCMPDZ256rmi:   NewOpc = X86::VPCMPEQDZ256rm;   break;
646       case X86::VPCMPDZ256rmib:  NewOpc = X86::VPCMPEQDZ256rmb;  break;
647       case X86::VPCMPDZ256rmibk: NewOpc = X86::VPCMPEQDZ256rmbk; break;
648       case X86::VPCMPDZ256rmik:  NewOpc = X86::VPCMPEQDZ256rmk;  break;
649       case X86::VPCMPDZ256rri:   NewOpc = X86::VPCMPEQDZ256rr;   break;
650       case X86::VPCMPDZ256rrik:  NewOpc = X86::VPCMPEQDZ256rrk;  break;
651       case X86::VPCMPDZrmi:      NewOpc = X86::VPCMPEQDZrm;      break;
652       case X86::VPCMPDZrmib:     NewOpc = X86::VPCMPEQDZrmb;     break;
653       case X86::VPCMPDZrmibk:    NewOpc = X86::VPCMPEQDZrmbk;    break;
654       case X86::VPCMPDZrmik:     NewOpc = X86::VPCMPEQDZrmk;     break;
655       case X86::VPCMPDZrri:      NewOpc = X86::VPCMPEQDZrr;      break;
656       case X86::VPCMPDZrrik:     NewOpc = X86::VPCMPEQDZrrk;     break;
657       case X86::VPCMPQZ128rmi:   NewOpc = X86::VPCMPEQQZ128rm;   break;
658       case X86::VPCMPQZ128rmib:  NewOpc = X86::VPCMPEQQZ128rmb;  break;
659       case X86::VPCMPQZ128rmibk: NewOpc = X86::VPCMPEQQZ128rmbk; break;
660       case X86::VPCMPQZ128rmik:  NewOpc = X86::VPCMPEQQZ128rmk;  break;
661       case X86::VPCMPQZ128rri:   NewOpc = X86::VPCMPEQQZ128rr;   break;
662       case X86::VPCMPQZ128rrik:  NewOpc = X86::VPCMPEQQZ128rrk;  break;
663       case X86::VPCMPQZ256rmi:   NewOpc = X86::VPCMPEQQZ256rm;   break;
664       case X86::VPCMPQZ256rmib:  NewOpc = X86::VPCMPEQQZ256rmb;  break;
665       case X86::VPCMPQZ256rmibk: NewOpc = X86::VPCMPEQQZ256rmbk; break;
666       case X86::VPCMPQZ256rmik:  NewOpc = X86::VPCMPEQQZ256rmk;  break;
667       case X86::VPCMPQZ256rri:   NewOpc = X86::VPCMPEQQZ256rr;   break;
668       case X86::VPCMPQZ256rrik:  NewOpc = X86::VPCMPEQQZ256rrk;  break;
669       case X86::VPCMPQZrmi:      NewOpc = X86::VPCMPEQQZrm;      break;
670       case X86::VPCMPQZrmib:     NewOpc = X86::VPCMPEQQZrmb;     break;
671       case X86::VPCMPQZrmibk:    NewOpc = X86::VPCMPEQQZrmbk;    break;
672       case X86::VPCMPQZrmik:     NewOpc = X86::VPCMPEQQZrmk;     break;
673       case X86::VPCMPQZrri:      NewOpc = X86::VPCMPEQQZrr;      break;
674       case X86::VPCMPQZrrik:     NewOpc = X86::VPCMPEQQZrrk;     break;
675       case X86::VPCMPWZ128rmi:   NewOpc = X86::VPCMPEQWZ128rm;   break;
676       case X86::VPCMPWZ128rmik:  NewOpc = X86::VPCMPEQWZ128rmk;  break;
677       case X86::VPCMPWZ128rri:   NewOpc = X86::VPCMPEQWZ128rr;   break;
678       case X86::VPCMPWZ128rrik:  NewOpc = X86::VPCMPEQWZ128rrk;  break;
679       case X86::VPCMPWZ256rmi:   NewOpc = X86::VPCMPEQWZ256rm;   break;
680       case X86::VPCMPWZ256rmik:  NewOpc = X86::VPCMPEQWZ256rmk;  break;
681       case X86::VPCMPWZ256rri:   NewOpc = X86::VPCMPEQWZ256rr;   break;
682       case X86::VPCMPWZ256rrik:  NewOpc = X86::VPCMPEQWZ256rrk;  break;
683       case X86::VPCMPWZrmi:      NewOpc = X86::VPCMPEQWZrm;      break;
684       case X86::VPCMPWZrmik:     NewOpc = X86::VPCMPEQWZrmk;     break;
685       case X86::VPCMPWZrri:      NewOpc = X86::VPCMPEQWZrr;      break;
686       case X86::VPCMPWZrrik:     NewOpc = X86::VPCMPEQWZrrk;     break;
687       }
688 
689       OutMI.setOpcode(NewOpc);
690       OutMI.erase(&OutMI.getOperand(OutMI.getNumOperands() - 1));
691       break;
692     }
693 
694     // Turn immediate 6 into the VPCMPGT instruction.
695     if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 6) {
696       unsigned NewOpc;
697       switch (OutMI.getOpcode()) {
698       default: llvm_unreachable("Invalid opcode");
699       case X86::VPCMPBZ128rmi:   NewOpc = X86::VPCMPGTBZ128rm;   break;
700       case X86::VPCMPBZ128rmik:  NewOpc = X86::VPCMPGTBZ128rmk;  break;
701       case X86::VPCMPBZ128rri:   NewOpc = X86::VPCMPGTBZ128rr;   break;
702       case X86::VPCMPBZ128rrik:  NewOpc = X86::VPCMPGTBZ128rrk;  break;
703       case X86::VPCMPBZ256rmi:   NewOpc = X86::VPCMPGTBZ256rm;   break;
704       case X86::VPCMPBZ256rmik:  NewOpc = X86::VPCMPGTBZ256rmk;  break;
705       case X86::VPCMPBZ256rri:   NewOpc = X86::VPCMPGTBZ256rr;   break;
706       case X86::VPCMPBZ256rrik:  NewOpc = X86::VPCMPGTBZ256rrk;  break;
707       case X86::VPCMPBZrmi:      NewOpc = X86::VPCMPGTBZrm;      break;
708       case X86::VPCMPBZrmik:     NewOpc = X86::VPCMPGTBZrmk;     break;
709       case X86::VPCMPBZrri:      NewOpc = X86::VPCMPGTBZrr;      break;
710       case X86::VPCMPBZrrik:     NewOpc = X86::VPCMPGTBZrrk;     break;
711       case X86::VPCMPDZ128rmi:   NewOpc = X86::VPCMPGTDZ128rm;   break;
712       case X86::VPCMPDZ128rmib:  NewOpc = X86::VPCMPGTDZ128rmb;  break;
713       case X86::VPCMPDZ128rmibk: NewOpc = X86::VPCMPGTDZ128rmbk; break;
714       case X86::VPCMPDZ128rmik:  NewOpc = X86::VPCMPGTDZ128rmk;  break;
715       case X86::VPCMPDZ128rri:   NewOpc = X86::VPCMPGTDZ128rr;   break;
716       case X86::VPCMPDZ128rrik:  NewOpc = X86::VPCMPGTDZ128rrk;  break;
717       case X86::VPCMPDZ256rmi:   NewOpc = X86::VPCMPGTDZ256rm;   break;
718       case X86::VPCMPDZ256rmib:  NewOpc = X86::VPCMPGTDZ256rmb;  break;
719       case X86::VPCMPDZ256rmibk: NewOpc = X86::VPCMPGTDZ256rmbk; break;
720       case X86::VPCMPDZ256rmik:  NewOpc = X86::VPCMPGTDZ256rmk;  break;
721       case X86::VPCMPDZ256rri:   NewOpc = X86::VPCMPGTDZ256rr;   break;
722       case X86::VPCMPDZ256rrik:  NewOpc = X86::VPCMPGTDZ256rrk;  break;
723       case X86::VPCMPDZrmi:      NewOpc = X86::VPCMPGTDZrm;      break;
724       case X86::VPCMPDZrmib:     NewOpc = X86::VPCMPGTDZrmb;     break;
725       case X86::VPCMPDZrmibk:    NewOpc = X86::VPCMPGTDZrmbk;    break;
726       case X86::VPCMPDZrmik:     NewOpc = X86::VPCMPGTDZrmk;     break;
727       case X86::VPCMPDZrri:      NewOpc = X86::VPCMPGTDZrr;      break;
728       case X86::VPCMPDZrrik:     NewOpc = X86::VPCMPGTDZrrk;     break;
729       case X86::VPCMPQZ128rmi:   NewOpc = X86::VPCMPGTQZ128rm;   break;
730       case X86::VPCMPQZ128rmib:  NewOpc = X86::VPCMPGTQZ128rmb;  break;
731       case X86::VPCMPQZ128rmibk: NewOpc = X86::VPCMPGTQZ128rmbk; break;
732       case X86::VPCMPQZ128rmik:  NewOpc = X86::VPCMPGTQZ128rmk;  break;
733       case X86::VPCMPQZ128rri:   NewOpc = X86::VPCMPGTQZ128rr;   break;
734       case X86::VPCMPQZ128rrik:  NewOpc = X86::VPCMPGTQZ128rrk;  break;
735       case X86::VPCMPQZ256rmi:   NewOpc = X86::VPCMPGTQZ256rm;   break;
736       case X86::VPCMPQZ256rmib:  NewOpc = X86::VPCMPGTQZ256rmb;  break;
737       case X86::VPCMPQZ256rmibk: NewOpc = X86::VPCMPGTQZ256rmbk; break;
738       case X86::VPCMPQZ256rmik:  NewOpc = X86::VPCMPGTQZ256rmk;  break;
739       case X86::VPCMPQZ256rri:   NewOpc = X86::VPCMPGTQZ256rr;   break;
740       case X86::VPCMPQZ256rrik:  NewOpc = X86::VPCMPGTQZ256rrk;  break;
741       case X86::VPCMPQZrmi:      NewOpc = X86::VPCMPGTQZrm;      break;
742       case X86::VPCMPQZrmib:     NewOpc = X86::VPCMPGTQZrmb;     break;
743       case X86::VPCMPQZrmibk:    NewOpc = X86::VPCMPGTQZrmbk;    break;
744       case X86::VPCMPQZrmik:     NewOpc = X86::VPCMPGTQZrmk;     break;
745       case X86::VPCMPQZrri:      NewOpc = X86::VPCMPGTQZrr;      break;
746       case X86::VPCMPQZrrik:     NewOpc = X86::VPCMPGTQZrrk;     break;
747       case X86::VPCMPWZ128rmi:   NewOpc = X86::VPCMPGTWZ128rm;   break;
748       case X86::VPCMPWZ128rmik:  NewOpc = X86::VPCMPGTWZ128rmk;  break;
749       case X86::VPCMPWZ128rri:   NewOpc = X86::VPCMPGTWZ128rr;   break;
750       case X86::VPCMPWZ128rrik:  NewOpc = X86::VPCMPGTWZ128rrk;  break;
751       case X86::VPCMPWZ256rmi:   NewOpc = X86::VPCMPGTWZ256rm;   break;
752       case X86::VPCMPWZ256rmik:  NewOpc = X86::VPCMPGTWZ256rmk;  break;
753       case X86::VPCMPWZ256rri:   NewOpc = X86::VPCMPGTWZ256rr;   break;
754       case X86::VPCMPWZ256rrik:  NewOpc = X86::VPCMPGTWZ256rrk;  break;
755       case X86::VPCMPWZrmi:      NewOpc = X86::VPCMPGTWZrm;      break;
756       case X86::VPCMPWZrmik:     NewOpc = X86::VPCMPGTWZrmk;     break;
757       case X86::VPCMPWZrri:      NewOpc = X86::VPCMPGTWZrr;      break;
758       case X86::VPCMPWZrrik:     NewOpc = X86::VPCMPGTWZrrk;     break;
759       }
760 
761       OutMI.setOpcode(NewOpc);
762       OutMI.erase(&OutMI.getOperand(OutMI.getNumOperands() - 1));
763       break;
764     }
765 
766     break;
767   }
768 
769   // CALL64r, CALL64pcrel32 - These instructions used to have
770   // register inputs modeled as normal uses instead of implicit uses.  As such,
771   // they we used to truncate off all but the first operand (the callee). This
772   // issue seems to have been fixed at some point. This assert verifies that.
773   case X86::CALL64r:
774   case X86::CALL64pcrel32:
775     assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!");
776     break;
777 
778   case X86::EH_RETURN:
779   case X86::EH_RETURN64: {
780     OutMI = MCInst();
781     OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
782     break;
783   }
784 
785   case X86::CLEANUPRET: {
786     // Replace CLEANUPRET with the appropriate RET.
787     OutMI = MCInst();
788     OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
789     break;
790   }
791 
792   case X86::CATCHRET: {
793     // Replace CATCHRET with the appropriate RET.
794     const X86Subtarget &Subtarget = AsmPrinter.getSubtarget();
795     unsigned ReturnReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
796     OutMI = MCInst();
797     OutMI.setOpcode(getRetOpcode(Subtarget));
798     OutMI.addOperand(MCOperand::createReg(ReturnReg));
799     break;
800   }
801 
802   // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump
803   // instruction.
804   case X86::TAILJMPr:
805   case X86::TAILJMPr64:
806   case X86::TAILJMPr64_REX:
807   case X86::TAILJMPd:
808   case X86::TAILJMPd64:
809     assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!");
810     OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
811     break;
812 
813   case X86::TAILJMPd_CC:
814   case X86::TAILJMPd64_CC:
815     assert(OutMI.getNumOperands() == 2 && "Unexpected number of operands!");
816     OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
817     break;
818 
819   case X86::TAILJMPm:
820   case X86::TAILJMPm64:
821   case X86::TAILJMPm64_REX:
822     assert(OutMI.getNumOperands() == X86::AddrNumOperands &&
823            "Unexpected number of operands!");
824     OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
825     break;
826 
827   case X86::DEC16r:
828   case X86::DEC32r:
829   case X86::INC16r:
830   case X86::INC32r:
831     // If we aren't in 64-bit mode we can use the 1-byte inc/dec instructions.
832     if (!AsmPrinter.getSubtarget().is64Bit()) {
833       unsigned Opcode;
834       switch (OutMI.getOpcode()) {
835       default: llvm_unreachable("Invalid opcode");
836       case X86::DEC16r: Opcode = X86::DEC16r_alt; break;
837       case X86::DEC32r: Opcode = X86::DEC32r_alt; break;
838       case X86::INC16r: Opcode = X86::INC16r_alt; break;
839       case X86::INC32r: Opcode = X86::INC32r_alt; break;
840       }
841       OutMI.setOpcode(Opcode);
842     }
843     break;
844 
845   // We don't currently select the correct instruction form for instructions
846   // which have a short %eax, etc. form. Handle this by custom lowering, for
847   // now.
848   //
849   // Note, we are currently not handling the following instructions:
850   // MOV64ao8, MOV64o8a
851   // XCHG16ar, XCHG32ar, XCHG64ar
852   case X86::MOV8mr_NOREX:
853   case X86::MOV8mr:
854   case X86::MOV8rm_NOREX:
855   case X86::MOV8rm:
856   case X86::MOV16mr:
857   case X86::MOV16rm:
858   case X86::MOV32mr:
859   case X86::MOV32rm: {
860     unsigned NewOpc;
861     switch (OutMI.getOpcode()) {
862     default: llvm_unreachable("Invalid opcode");
863     case X86::MOV8mr_NOREX:
864     case X86::MOV8mr:  NewOpc = X86::MOV8o32a; break;
865     case X86::MOV8rm_NOREX:
866     case X86::MOV8rm:  NewOpc = X86::MOV8ao32; break;
867     case X86::MOV16mr: NewOpc = X86::MOV16o32a; break;
868     case X86::MOV16rm: NewOpc = X86::MOV16ao32; break;
869     case X86::MOV32mr: NewOpc = X86::MOV32o32a; break;
870     case X86::MOV32rm: NewOpc = X86::MOV32ao32; break;
871     }
872     SimplifyShortMoveForm(AsmPrinter, OutMI, NewOpc);
873     break;
874   }
875 
876   case X86::ADC8ri: case X86::ADC16ri: case X86::ADC32ri: case X86::ADC64ri32:
877   case X86::ADD8ri: case X86::ADD16ri: case X86::ADD32ri: case X86::ADD64ri32:
878   case X86::AND8ri: case X86::AND16ri: case X86::AND32ri: case X86::AND64ri32:
879   case X86::CMP8ri: case X86::CMP16ri: case X86::CMP32ri: case X86::CMP64ri32:
880   case X86::OR8ri:  case X86::OR16ri:  case X86::OR32ri:  case X86::OR64ri32:
881   case X86::SBB8ri: case X86::SBB16ri: case X86::SBB32ri: case X86::SBB64ri32:
882   case X86::SUB8ri: case X86::SUB16ri: case X86::SUB32ri: case X86::SUB64ri32:
883   case X86::TEST8ri:case X86::TEST16ri:case X86::TEST32ri:case X86::TEST64ri32:
884   case X86::XOR8ri: case X86::XOR16ri: case X86::XOR32ri: case X86::XOR64ri32: {
885     unsigned NewOpc;
886     switch (OutMI.getOpcode()) {
887     default: llvm_unreachable("Invalid opcode");
888     case X86::ADC8ri:     NewOpc = X86::ADC8i8;    break;
889     case X86::ADC16ri:    NewOpc = X86::ADC16i16;  break;
890     case X86::ADC32ri:    NewOpc = X86::ADC32i32;  break;
891     case X86::ADC64ri32:  NewOpc = X86::ADC64i32;  break;
892     case X86::ADD8ri:     NewOpc = X86::ADD8i8;    break;
893     case X86::ADD16ri:    NewOpc = X86::ADD16i16;  break;
894     case X86::ADD32ri:    NewOpc = X86::ADD32i32;  break;
895     case X86::ADD64ri32:  NewOpc = X86::ADD64i32;  break;
896     case X86::AND8ri:     NewOpc = X86::AND8i8;    break;
897     case X86::AND16ri:    NewOpc = X86::AND16i16;  break;
898     case X86::AND32ri:    NewOpc = X86::AND32i32;  break;
899     case X86::AND64ri32:  NewOpc = X86::AND64i32;  break;
900     case X86::CMP8ri:     NewOpc = X86::CMP8i8;    break;
901     case X86::CMP16ri:    NewOpc = X86::CMP16i16;  break;
902     case X86::CMP32ri:    NewOpc = X86::CMP32i32;  break;
903     case X86::CMP64ri32:  NewOpc = X86::CMP64i32;  break;
904     case X86::OR8ri:      NewOpc = X86::OR8i8;     break;
905     case X86::OR16ri:     NewOpc = X86::OR16i16;   break;
906     case X86::OR32ri:     NewOpc = X86::OR32i32;   break;
907     case X86::OR64ri32:   NewOpc = X86::OR64i32;   break;
908     case X86::SBB8ri:     NewOpc = X86::SBB8i8;    break;
909     case X86::SBB16ri:    NewOpc = X86::SBB16i16;  break;
910     case X86::SBB32ri:    NewOpc = X86::SBB32i32;  break;
911     case X86::SBB64ri32:  NewOpc = X86::SBB64i32;  break;
912     case X86::SUB8ri:     NewOpc = X86::SUB8i8;    break;
913     case X86::SUB16ri:    NewOpc = X86::SUB16i16;  break;
914     case X86::SUB32ri:    NewOpc = X86::SUB32i32;  break;
915     case X86::SUB64ri32:  NewOpc = X86::SUB64i32;  break;
916     case X86::TEST8ri:    NewOpc = X86::TEST8i8;   break;
917     case X86::TEST16ri:   NewOpc = X86::TEST16i16; break;
918     case X86::TEST32ri:   NewOpc = X86::TEST32i32; break;
919     case X86::TEST64ri32: NewOpc = X86::TEST64i32; break;
920     case X86::XOR8ri:     NewOpc = X86::XOR8i8;    break;
921     case X86::XOR16ri:    NewOpc = X86::XOR16i16;  break;
922     case X86::XOR32ri:    NewOpc = X86::XOR32i32;  break;
923     case X86::XOR64ri32:  NewOpc = X86::XOR64i32;  break;
924     }
925     SimplifyShortImmForm(OutMI, NewOpc);
926     break;
927   }
928 
929   // Try to shrink some forms of movsx.
930   case X86::MOVSX16rr8:
931   case X86::MOVSX32rr16:
932   case X86::MOVSX64rr32:
933     SimplifyMOVSX(OutMI);
934     break;
935 
936   case X86::VCMPPDrri:
937   case X86::VCMPPDYrri:
938   case X86::VCMPPSrri:
939   case X86::VCMPPSYrri:
940   case X86::VCMPSDrr:
941   case X86::VCMPSSrr: {
942     // Swap the operands if it will enable a 2 byte VEX encoding.
943     // FIXME: Change the immediate to improve opportunities?
944     if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg()) &&
945         X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
946       unsigned Imm = MI->getOperand(3).getImm() & 0x7;
947       switch (Imm) {
948       default: break;
949       case 0x00: // EQUAL
950       case 0x03: // UNORDERED
951       case 0x04: // NOT EQUAL
952       case 0x07: // ORDERED
953         std::swap(OutMI.getOperand(1), OutMI.getOperand(2));
954         break;
955       }
956     }
957     break;
958   }
959 
960   case X86::VMOVHLPSrr:
961   case X86::VUNPCKHPDrr:
962     // These are not truly commutable so hide them from the default case.
963     break;
964 
965   case X86::MASKMOVDQU:
966   case X86::VMASKMOVDQU:
967     if (AsmPrinter.getSubtarget().is64Bit())
968       OutMI.setFlags(X86::IP_HAS_AD_SIZE);
969     break;
970 
971   default: {
972     // If the instruction is a commutable arithmetic instruction we might be
973     // able to commute the operands to get a 2 byte VEX prefix.
974     uint64_t TSFlags = MI->getDesc().TSFlags;
975     if (MI->getDesc().isCommutable() &&
976         (TSFlags & X86II::EncodingMask) == X86II::VEX &&
977         (TSFlags & X86II::OpMapMask) == X86II::TB &&
978         (TSFlags & X86II::FormMask) == X86II::MRMSrcReg &&
979         !(TSFlags & X86II::VEX_W) && (TSFlags & X86II::VEX_4V) &&
980         OutMI.getNumOperands() == 3) {
981       if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg()) &&
982           X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg()))
983         std::swap(OutMI.getOperand(1), OutMI.getOperand(2));
984     }
985     break;
986   }
987   }
988 }
989 
990 void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
991                                  const MachineInstr &MI) {
992   NoAutoPaddingScope NoPadScope(*OutStreamer);
993   bool Is64Bits = MI.getOpcode() != X86::TLS_addr32 &&
994                   MI.getOpcode() != X86::TLS_base_addr32;
995   bool Is64BitsLP64 = MI.getOpcode() == X86::TLS_addr64 ||
996                       MI.getOpcode() == X86::TLS_base_addr64;
997   MCContext &Ctx = OutStreamer->getContext();
998 
999   MCSymbolRefExpr::VariantKind SRVK;
1000   switch (MI.getOpcode()) {
1001   case X86::TLS_addr32:
1002   case X86::TLS_addr64:
1003   case X86::TLS_addrX32:
1004     SRVK = MCSymbolRefExpr::VK_TLSGD;
1005     break;
1006   case X86::TLS_base_addr32:
1007     SRVK = MCSymbolRefExpr::VK_TLSLDM;
1008     break;
1009   case X86::TLS_base_addr64:
1010   case X86::TLS_base_addrX32:
1011     SRVK = MCSymbolRefExpr::VK_TLSLD;
1012     break;
1013   default:
1014     llvm_unreachable("unexpected opcode");
1015   }
1016 
1017   const MCSymbolRefExpr *Sym = MCSymbolRefExpr::create(
1018       MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)), SRVK, Ctx);
1019 
1020   // As of binutils 2.32, ld has a bogus TLS relaxation error when the GD/LD
1021   // code sequence using R_X86_64_GOTPCREL (instead of R_X86_64_GOTPCRELX) is
1022   // attempted to be relaxed to IE/LE (binutils PR24784). Work around the bug by
1023   // only using GOT when GOTPCRELX is enabled.
1024   // TODO Delete the workaround when GOTPCRELX becomes commonplace.
1025   bool UseGot = MMI->getModule()->getRtLibUseGOT() &&
1026                 Ctx.getAsmInfo()->canRelaxRelocations();
1027 
1028   if (Is64Bits) {
1029     bool NeedsPadding = SRVK == MCSymbolRefExpr::VK_TLSGD;
1030     if (NeedsPadding && Is64BitsLP64)
1031       EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
1032     EmitAndCountInstruction(MCInstBuilder(X86::LEA64r)
1033                                 .addReg(X86::RDI)
1034                                 .addReg(X86::RIP)
1035                                 .addImm(1)
1036                                 .addReg(0)
1037                                 .addExpr(Sym)
1038                                 .addReg(0));
1039     const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("__tls_get_addr");
1040     if (NeedsPadding) {
1041       if (!UseGot)
1042         EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
1043       EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
1044       EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX));
1045     }
1046     if (UseGot) {
1047       const MCExpr *Expr = MCSymbolRefExpr::create(
1048           TlsGetAddr, MCSymbolRefExpr::VK_GOTPCREL, Ctx);
1049       EmitAndCountInstruction(MCInstBuilder(X86::CALL64m)
1050                                   .addReg(X86::RIP)
1051                                   .addImm(1)
1052                                   .addReg(0)
1053                                   .addExpr(Expr)
1054                                   .addReg(0));
1055     } else {
1056       EmitAndCountInstruction(
1057           MCInstBuilder(X86::CALL64pcrel32)
1058               .addExpr(MCSymbolRefExpr::create(TlsGetAddr,
1059                                                MCSymbolRefExpr::VK_PLT, Ctx)));
1060     }
1061   } else {
1062     if (SRVK == MCSymbolRefExpr::VK_TLSGD && !UseGot) {
1063       EmitAndCountInstruction(MCInstBuilder(X86::LEA32r)
1064                                   .addReg(X86::EAX)
1065                                   .addReg(0)
1066                                   .addImm(1)
1067                                   .addReg(X86::EBX)
1068                                   .addExpr(Sym)
1069                                   .addReg(0));
1070     } else {
1071       EmitAndCountInstruction(MCInstBuilder(X86::LEA32r)
1072                                   .addReg(X86::EAX)
1073                                   .addReg(X86::EBX)
1074                                   .addImm(1)
1075                                   .addReg(0)
1076                                   .addExpr(Sym)
1077                                   .addReg(0));
1078     }
1079 
1080     const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("___tls_get_addr");
1081     if (UseGot) {
1082       const MCExpr *Expr =
1083           MCSymbolRefExpr::create(TlsGetAddr, MCSymbolRefExpr::VK_GOT, Ctx);
1084       EmitAndCountInstruction(MCInstBuilder(X86::CALL32m)
1085                                   .addReg(X86::EBX)
1086                                   .addImm(1)
1087                                   .addReg(0)
1088                                   .addExpr(Expr)
1089                                   .addReg(0));
1090     } else {
1091       EmitAndCountInstruction(
1092           MCInstBuilder(X86::CALLpcrel32)
1093               .addExpr(MCSymbolRefExpr::create(TlsGetAddr,
1094                                                MCSymbolRefExpr::VK_PLT, Ctx)));
1095     }
1096   }
1097 }
1098 
1099 /// Emit the largest nop instruction smaller than or equal to \p NumBytes
1100 /// bytes.  Return the size of nop emitted.
1101 static unsigned emitNop(MCStreamer &OS, unsigned NumBytes,
1102                         const X86Subtarget *Subtarget) {
1103   // Determine the longest nop which can be efficiently decoded for the given
1104   // target cpu.  15-bytes is the longest single NOP instruction, but some
1105   // platforms can't decode the longest forms efficiently.
1106   unsigned MaxNopLength = 1;
1107   if (Subtarget->is64Bit()) {
1108     // FIXME: We can use NOOPL on 32-bit targets with FeatureNOPL, but the
1109     // IndexReg/BaseReg below need to be updated.
1110     if (Subtarget->hasFeature(X86::TuningFast7ByteNOP))
1111       MaxNopLength = 7;
1112     else if (Subtarget->hasFeature(X86::TuningFast15ByteNOP))
1113       MaxNopLength = 15;
1114     else if (Subtarget->hasFeature(X86::TuningFast11ByteNOP))
1115       MaxNopLength = 11;
1116     else
1117       MaxNopLength = 10;
1118   } if (Subtarget->is32Bit())
1119     MaxNopLength = 2;
1120 
1121   // Cap a single nop emission at the profitable value for the target
1122   NumBytes = std::min(NumBytes, MaxNopLength);
1123 
1124   unsigned NopSize;
1125   unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg;
1126   IndexReg = Displacement = SegmentReg = 0;
1127   BaseReg = X86::RAX;
1128   ScaleVal = 1;
1129   switch (NumBytes) {
1130   case 0:
1131     llvm_unreachable("Zero nops?");
1132     break;
1133   case 1:
1134     NopSize = 1;
1135     Opc = X86::NOOP;
1136     break;
1137   case 2:
1138     NopSize = 2;
1139     Opc = X86::XCHG16ar;
1140     break;
1141   case 3:
1142     NopSize = 3;
1143     Opc = X86::NOOPL;
1144     break;
1145   case 4:
1146     NopSize = 4;
1147     Opc = X86::NOOPL;
1148     Displacement = 8;
1149     break;
1150   case 5:
1151     NopSize = 5;
1152     Opc = X86::NOOPL;
1153     Displacement = 8;
1154     IndexReg = X86::RAX;
1155     break;
1156   case 6:
1157     NopSize = 6;
1158     Opc = X86::NOOPW;
1159     Displacement = 8;
1160     IndexReg = X86::RAX;
1161     break;
1162   case 7:
1163     NopSize = 7;
1164     Opc = X86::NOOPL;
1165     Displacement = 512;
1166     break;
1167   case 8:
1168     NopSize = 8;
1169     Opc = X86::NOOPL;
1170     Displacement = 512;
1171     IndexReg = X86::RAX;
1172     break;
1173   case 9:
1174     NopSize = 9;
1175     Opc = X86::NOOPW;
1176     Displacement = 512;
1177     IndexReg = X86::RAX;
1178     break;
1179   default:
1180     NopSize = 10;
1181     Opc = X86::NOOPW;
1182     Displacement = 512;
1183     IndexReg = X86::RAX;
1184     SegmentReg = X86::CS;
1185     break;
1186   }
1187 
1188   unsigned NumPrefixes = std::min(NumBytes - NopSize, 5U);
1189   NopSize += NumPrefixes;
1190   for (unsigned i = 0; i != NumPrefixes; ++i)
1191     OS.emitBytes("\x66");
1192 
1193   switch (Opc) {
1194   default: llvm_unreachable("Unexpected opcode");
1195   case X86::NOOP:
1196     OS.emitInstruction(MCInstBuilder(Opc), *Subtarget);
1197     break;
1198   case X86::XCHG16ar:
1199     OS.emitInstruction(MCInstBuilder(Opc).addReg(X86::AX).addReg(X86::AX),
1200                        *Subtarget);
1201     break;
1202   case X86::NOOPL:
1203   case X86::NOOPW:
1204     OS.emitInstruction(MCInstBuilder(Opc)
1205                            .addReg(BaseReg)
1206                            .addImm(ScaleVal)
1207                            .addReg(IndexReg)
1208                            .addImm(Displacement)
1209                            .addReg(SegmentReg),
1210                        *Subtarget);
1211     break;
1212   }
1213   assert(NopSize <= NumBytes && "We overemitted?");
1214   return NopSize;
1215 }
1216 
1217 /// Emit the optimal amount of multi-byte nops on X86.
1218 static void emitX86Nops(MCStreamer &OS, unsigned NumBytes,
1219                         const X86Subtarget *Subtarget) {
1220   unsigned NopsToEmit = NumBytes;
1221   (void)NopsToEmit;
1222   while (NumBytes) {
1223     NumBytes -= emitNop(OS, NumBytes, Subtarget);
1224     assert(NopsToEmit >= NumBytes && "Emitted more than I asked for!");
1225   }
1226 }
1227 
1228 void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
1229                                     X86MCInstLower &MCIL) {
1230   assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64");
1231 
1232   NoAutoPaddingScope NoPadScope(*OutStreamer);
1233 
1234   StatepointOpers SOpers(&MI);
1235   if (unsigned PatchBytes = SOpers.getNumPatchBytes()) {
1236     emitX86Nops(*OutStreamer, PatchBytes, Subtarget);
1237   } else {
1238     // Lower call target and choose correct opcode
1239     const MachineOperand &CallTarget = SOpers.getCallTarget();
1240     MCOperand CallTargetMCOp;
1241     unsigned CallOpcode;
1242     switch (CallTarget.getType()) {
1243     case MachineOperand::MO_GlobalAddress:
1244     case MachineOperand::MO_ExternalSymbol:
1245       CallTargetMCOp = MCIL.LowerSymbolOperand(
1246           CallTarget, MCIL.GetSymbolFromOperand(CallTarget));
1247       CallOpcode = X86::CALL64pcrel32;
1248       // Currently, we only support relative addressing with statepoints.
1249       // Otherwise, we'll need a scratch register to hold the target
1250       // address.  You'll fail asserts during load & relocation if this
1251       // symbol is to far away. (TODO: support non-relative addressing)
1252       break;
1253     case MachineOperand::MO_Immediate:
1254       CallTargetMCOp = MCOperand::createImm(CallTarget.getImm());
1255       CallOpcode = X86::CALL64pcrel32;
1256       // Currently, we only support relative addressing with statepoints.
1257       // Otherwise, we'll need a scratch register to hold the target
1258       // immediate.  You'll fail asserts during load & relocation if this
1259       // address is to far away. (TODO: support non-relative addressing)
1260       break;
1261     case MachineOperand::MO_Register:
1262       // FIXME: Add retpoline support and remove this.
1263       if (Subtarget->useIndirectThunkCalls())
1264         report_fatal_error("Lowering register statepoints with thunks not "
1265                            "yet implemented.");
1266       CallTargetMCOp = MCOperand::createReg(CallTarget.getReg());
1267       CallOpcode = X86::CALL64r;
1268       break;
1269     default:
1270       llvm_unreachable("Unsupported operand type in statepoint call target");
1271       break;
1272     }
1273 
1274     // Emit call
1275     MCInst CallInst;
1276     CallInst.setOpcode(CallOpcode);
1277     CallInst.addOperand(CallTargetMCOp);
1278     OutStreamer->emitInstruction(CallInst, getSubtargetInfo());
1279   }
1280 
1281   // Record our statepoint node in the same section used by STACKMAP
1282   // and PATCHPOINT
1283   auto &Ctx = OutStreamer->getContext();
1284   MCSymbol *MILabel = Ctx.createTempSymbol();
1285   OutStreamer->emitLabel(MILabel);
1286   SM.recordStatepoint(*MILabel, MI);
1287 }
1288 
1289 void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,
1290                                      X86MCInstLower &MCIL) {
1291   // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>,
1292   //                  <opcode>, <operands>
1293 
1294   NoAutoPaddingScope NoPadScope(*OutStreamer);
1295 
1296   Register DefRegister = FaultingMI.getOperand(0).getReg();
1297   FaultMaps::FaultKind FK =
1298       static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(1).getImm());
1299   MCSymbol *HandlerLabel = FaultingMI.getOperand(2).getMBB()->getSymbol();
1300   unsigned Opcode = FaultingMI.getOperand(3).getImm();
1301   unsigned OperandsBeginIdx = 4;
1302 
1303   auto &Ctx = OutStreamer->getContext();
1304   MCSymbol *FaultingLabel = Ctx.createTempSymbol();
1305   OutStreamer->emitLabel(FaultingLabel);
1306 
1307   assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!");
1308   FM.recordFaultingOp(FK, FaultingLabel, HandlerLabel);
1309 
1310   MCInst MI;
1311   MI.setOpcode(Opcode);
1312 
1313   if (DefRegister != X86::NoRegister)
1314     MI.addOperand(MCOperand::createReg(DefRegister));
1315 
1316   for (auto I = FaultingMI.operands_begin() + OperandsBeginIdx,
1317             E = FaultingMI.operands_end();
1318        I != E; ++I)
1319     if (auto MaybeOperand = MCIL.LowerMachineOperand(&FaultingMI, *I))
1320       MI.addOperand(*MaybeOperand);
1321 
1322   OutStreamer->AddComment("on-fault: " + HandlerLabel->getName());
1323   OutStreamer->emitInstruction(MI, getSubtargetInfo());
1324 }
1325 
1326 void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI,
1327                                      X86MCInstLower &MCIL) {
1328   bool Is64Bits = Subtarget->is64Bit();
1329   MCContext &Ctx = OutStreamer->getContext();
1330   MCSymbol *fentry = Ctx.getOrCreateSymbol("__fentry__");
1331   const MCSymbolRefExpr *Op =
1332       MCSymbolRefExpr::create(fentry, MCSymbolRefExpr::VK_None, Ctx);
1333 
1334   EmitAndCountInstruction(
1335       MCInstBuilder(Is64Bits ? X86::CALL64pcrel32 : X86::CALLpcrel32)
1336           .addExpr(Op));
1337 }
1338 
1339 void X86AsmPrinter::LowerASAN_CHECK_MEMACCESS(const MachineInstr &MI) {
1340   // FIXME: Make this work on non-ELF.
1341   if (!TM.getTargetTriple().isOSBinFormatELF()) {
1342     report_fatal_error("llvm.asan.check.memaccess only supported on ELF");
1343     return;
1344   }
1345 
1346   const auto &Reg = MI.getOperand(0).getReg();
1347   ASanAccessInfo AccessInfo(MI.getOperand(1).getImm());
1348 
1349   uint64_t ShadowBase;
1350   int MappingScale;
1351   bool OrShadowOffset;
1352   getAddressSanitizerParams(Triple(TM.getTargetTriple()), 64,
1353                             AccessInfo.CompileKernel, &ShadowBase,
1354                             &MappingScale, &OrShadowOffset);
1355 
1356   StringRef Name = AccessInfo.IsWrite ? "store" : "load";
1357   StringRef Op = OrShadowOffset ? "or" : "add";
1358   std::string SymName = ("__asan_check_" + Name + "_" + Op + "_" +
1359                          Twine(1ULL << AccessInfo.AccessSizeIndex) + "_" +
1360                          TM.getMCRegisterInfo()->getName(Reg.asMCReg()))
1361                             .str();
1362   if (OrShadowOffset)
1363     report_fatal_error(
1364         "OrShadowOffset is not supported with optimized callbacks");
1365 
1366   EmitAndCountInstruction(
1367       MCInstBuilder(X86::CALL64pcrel32)
1368           .addExpr(MCSymbolRefExpr::create(
1369               OutContext.getOrCreateSymbol(SymName), OutContext)));
1370 }
1371 
1372 void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
1373                                       X86MCInstLower &MCIL) {
1374   // PATCHABLE_OP minsize, opcode, operands
1375 
1376   NoAutoPaddingScope NoPadScope(*OutStreamer);
1377 
1378   unsigned MinSize = MI.getOperand(0).getImm();
1379   unsigned Opcode = MI.getOperand(1).getImm();
1380 
1381   MCInst MCI;
1382   MCI.setOpcode(Opcode);
1383   for (auto &MO : drop_begin(MI.operands(), 2))
1384     if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
1385       MCI.addOperand(*MaybeOperand);
1386 
1387   SmallString<256> Code;
1388   SmallVector<MCFixup, 4> Fixups;
1389   raw_svector_ostream VecOS(Code);
1390   CodeEmitter->encodeInstruction(MCI, VecOS, Fixups, getSubtargetInfo());
1391 
1392   if (Code.size() < MinSize) {
1393     if (MinSize == 2 && Subtarget->is32Bit() &&
1394         Subtarget->isTargetWindowsMSVC() &&
1395         (Subtarget->getCPU().empty() || Subtarget->getCPU() == "pentium3")) {
1396       // For compatibilty reasons, when targetting MSVC, is is important to
1397       // generate a 'legacy' NOP in the form of a 8B FF MOV EDI, EDI. Some tools
1398       // rely specifically on this pattern to be able to patch a function.
1399       // This is only for 32-bit targets, when using /arch:IA32 or /arch:SSE.
1400       OutStreamer->emitInstruction(
1401           MCInstBuilder(X86::MOV32rr_REV).addReg(X86::EDI).addReg(X86::EDI),
1402           *Subtarget);
1403     } else if (MinSize == 2 && Opcode == X86::PUSH64r) {
1404       // This is an optimization that lets us get away without emitting a nop in
1405       // many cases.
1406       //
1407       // NB! In some cases the encoding for PUSH64r (e.g. PUSH64r %r9) takes two
1408       // bytes too, so the check on MinSize is important.
1409       MCI.setOpcode(X86::PUSH64rmr);
1410     } else {
1411       unsigned NopSize = emitNop(*OutStreamer, MinSize, Subtarget);
1412       assert(NopSize == MinSize && "Could not implement MinSize!");
1413       (void)NopSize;
1414     }
1415   }
1416 
1417   OutStreamer->emitInstruction(MCI, getSubtargetInfo());
1418 }
1419 
1420 // Lower a stackmap of the form:
1421 // <id>, <shadowBytes>, ...
1422 void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
1423   SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
1424 
1425   auto &Ctx = OutStreamer->getContext();
1426   MCSymbol *MILabel = Ctx.createTempSymbol();
1427   OutStreamer->emitLabel(MILabel);
1428 
1429   SM.recordStackMap(*MILabel, MI);
1430   unsigned NumShadowBytes = MI.getOperand(1).getImm();
1431   SMShadowTracker.reset(NumShadowBytes);
1432 }
1433 
1434 // Lower a patchpoint of the form:
1435 // [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ...
1436 void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
1437                                     X86MCInstLower &MCIL) {
1438   assert(Subtarget->is64Bit() && "Patchpoint currently only supports X86-64");
1439 
1440   SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
1441 
1442   NoAutoPaddingScope NoPadScope(*OutStreamer);
1443 
1444   auto &Ctx = OutStreamer->getContext();
1445   MCSymbol *MILabel = Ctx.createTempSymbol();
1446   OutStreamer->emitLabel(MILabel);
1447   SM.recordPatchPoint(*MILabel, MI);
1448 
1449   PatchPointOpers opers(&MI);
1450   unsigned ScratchIdx = opers.getNextScratchIdx();
1451   unsigned EncodedBytes = 0;
1452   const MachineOperand &CalleeMO = opers.getCallTarget();
1453 
1454   // Check for null target. If target is non-null (i.e. is non-zero or is
1455   // symbolic) then emit a call.
1456   if (!(CalleeMO.isImm() && !CalleeMO.getImm())) {
1457     MCOperand CalleeMCOp;
1458     switch (CalleeMO.getType()) {
1459     default:
1460       /// FIXME: Add a verifier check for bad callee types.
1461       llvm_unreachable("Unrecognized callee operand type.");
1462     case MachineOperand::MO_Immediate:
1463       if (CalleeMO.getImm())
1464         CalleeMCOp = MCOperand::createImm(CalleeMO.getImm());
1465       break;
1466     case MachineOperand::MO_ExternalSymbol:
1467     case MachineOperand::MO_GlobalAddress:
1468       CalleeMCOp = MCIL.LowerSymbolOperand(CalleeMO,
1469                                            MCIL.GetSymbolFromOperand(CalleeMO));
1470       break;
1471     }
1472 
1473     // Emit MOV to materialize the target address and the CALL to target.
1474     // This is encoded with 12-13 bytes, depending on which register is used.
1475     Register ScratchReg = MI.getOperand(ScratchIdx).getReg();
1476     if (X86II::isX86_64ExtendedReg(ScratchReg))
1477       EncodedBytes = 13;
1478     else
1479       EncodedBytes = 12;
1480 
1481     EmitAndCountInstruction(
1482         MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp));
1483     // FIXME: Add retpoline support and remove this.
1484     if (Subtarget->useIndirectThunkCalls())
1485       report_fatal_error(
1486           "Lowering patchpoint with thunks not yet implemented.");
1487     EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg));
1488   }
1489 
1490   // Emit padding.
1491   unsigned NumBytes = opers.getNumPatchBytes();
1492   assert(NumBytes >= EncodedBytes &&
1493          "Patchpoint can't request size less than the length of a call.");
1494 
1495   emitX86Nops(*OutStreamer, NumBytes - EncodedBytes, Subtarget);
1496 }
1497 
1498 void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
1499                                               X86MCInstLower &MCIL) {
1500   assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64");
1501 
1502   NoAutoPaddingScope NoPadScope(*OutStreamer);
1503 
1504   // We want to emit the following pattern, which follows the x86 calling
1505   // convention to prepare for the trampoline call to be patched in.
1506   //
1507   //   .p2align 1, ...
1508   // .Lxray_event_sled_N:
1509   //   jmp +N                        // jump across the instrumentation sled
1510   //   ...                           // set up arguments in register
1511   //   callq __xray_CustomEvent@plt  // force dependency to symbol
1512   //   ...
1513   //   <jump here>
1514   //
1515   // After patching, it would look something like:
1516   //
1517   //   nopw (2-byte nop)
1518   //   ...
1519   //   callq __xrayCustomEvent  // already lowered
1520   //   ...
1521   //
1522   // ---
1523   // First we emit the label and the jump.
1524   auto CurSled = OutContext.createTempSymbol("xray_event_sled_", true);
1525   OutStreamer->AddComment("# XRay Custom Event Log");
1526   OutStreamer->emitCodeAlignment(2, &getSubtargetInfo());
1527   OutStreamer->emitLabel(CurSled);
1528 
1529   // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1530   // an operand (computed as an offset from the jmp instruction).
1531   // FIXME: Find another less hacky way do force the relative jump.
1532   OutStreamer->emitBinaryData("\xeb\x0f");
1533 
1534   // The default C calling convention will place two arguments into %rcx and
1535   // %rdx -- so we only work with those.
1536   const Register DestRegs[] = {X86::RDI, X86::RSI};
1537   bool UsedMask[] = {false, false};
1538   // Filled out in loop.
1539   Register SrcRegs[] = {0, 0};
1540 
1541   // Then we put the operands in the %rdi and %rsi registers. We spill the
1542   // values in the register before we clobber them, and mark them as used in
1543   // UsedMask. In case the arguments are already in the correct register, we use
1544   // emit nops appropriately sized to keep the sled the same size in every
1545   // situation.
1546   for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1547     if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
1548       assert(Op->isReg() && "Only support arguments in registers");
1549       SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64);
1550       if (SrcRegs[I] != DestRegs[I]) {
1551         UsedMask[I] = true;
1552         EmitAndCountInstruction(
1553             MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
1554       } else {
1555         emitX86Nops(*OutStreamer, 4, Subtarget);
1556       }
1557     }
1558 
1559   // Now that the register values are stashed, mov arguments into place.
1560   // FIXME: This doesn't work if one of the later SrcRegs is equal to an
1561   // earlier DestReg. We will have already overwritten over the register before
1562   // we can copy from it.
1563   for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1564     if (SrcRegs[I] != DestRegs[I])
1565       EmitAndCountInstruction(
1566           MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I]));
1567 
1568   // We emit a hard dependency on the __xray_CustomEvent symbol, which is the
1569   // name of the trampoline to be implemented by the XRay runtime.
1570   auto TSym = OutContext.getOrCreateSymbol("__xray_CustomEvent");
1571   MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym);
1572   if (isPositionIndependent())
1573     TOp.setTargetFlags(X86II::MO_PLT);
1574 
1575   // Emit the call instruction.
1576   EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32)
1577                               .addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
1578 
1579   // Restore caller-saved and used registers.
1580   for (unsigned I = sizeof UsedMask; I-- > 0;)
1581     if (UsedMask[I])
1582       EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
1583     else
1584       emitX86Nops(*OutStreamer, 1, Subtarget);
1585 
1586   OutStreamer->AddComment("xray custom event end.");
1587 
1588   // Record the sled version. Version 0 of this sled was spelled differently, so
1589   // we let the runtime handle the different offsets we're using. Version 2
1590   // changed the absolute address to a PC-relative address.
1591   recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 2);
1592 }
1593 
1594 void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
1595                                                     X86MCInstLower &MCIL) {
1596   assert(Subtarget->is64Bit() && "XRay typed events only supports X86-64");
1597 
1598   NoAutoPaddingScope NoPadScope(*OutStreamer);
1599 
1600   // We want to emit the following pattern, which follows the x86 calling
1601   // convention to prepare for the trampoline call to be patched in.
1602   //
1603   //   .p2align 1, ...
1604   // .Lxray_event_sled_N:
1605   //   jmp +N                        // jump across the instrumentation sled
1606   //   ...                           // set up arguments in register
1607   //   callq __xray_TypedEvent@plt  // force dependency to symbol
1608   //   ...
1609   //   <jump here>
1610   //
1611   // After patching, it would look something like:
1612   //
1613   //   nopw (2-byte nop)
1614   //   ...
1615   //   callq __xrayTypedEvent  // already lowered
1616   //   ...
1617   //
1618   // ---
1619   // First we emit the label and the jump.
1620   auto CurSled = OutContext.createTempSymbol("xray_typed_event_sled_", true);
1621   OutStreamer->AddComment("# XRay Typed Event Log");
1622   OutStreamer->emitCodeAlignment(2, &getSubtargetInfo());
1623   OutStreamer->emitLabel(CurSled);
1624 
1625   // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1626   // an operand (computed as an offset from the jmp instruction).
1627   // FIXME: Find another less hacky way do force the relative jump.
1628   OutStreamer->emitBinaryData("\xeb\x14");
1629 
1630   // An x86-64 convention may place three arguments into %rcx, %rdx, and R8,
1631   // so we'll work with those. Or we may be called via SystemV, in which case
1632   // we don't have to do any translation.
1633   const Register DestRegs[] = {X86::RDI, X86::RSI, X86::RDX};
1634   bool UsedMask[] = {false, false, false};
1635 
1636   // Will fill out src regs in the loop.
1637   Register SrcRegs[] = {0, 0, 0};
1638 
1639   // Then we put the operands in the SystemV registers. We spill the values in
1640   // the registers before we clobber them, and mark them as used in UsedMask.
1641   // In case the arguments are already in the correct register, we emit nops
1642   // appropriately sized to keep the sled the same size in every situation.
1643   for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1644     if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
1645       // TODO: Is register only support adequate?
1646       assert(Op->isReg() && "Only supports arguments in registers");
1647       SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64);
1648       if (SrcRegs[I] != DestRegs[I]) {
1649         UsedMask[I] = true;
1650         EmitAndCountInstruction(
1651             MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
1652       } else {
1653         emitX86Nops(*OutStreamer, 4, Subtarget);
1654       }
1655     }
1656 
1657   // In the above loop we only stash all of the destination registers or emit
1658   // nops if the arguments are already in the right place. Doing the actually
1659   // moving is postponed until after all the registers are stashed so nothing
1660   // is clobbers. We've already added nops to account for the size of mov and
1661   // push if the register is in the right place, so we only have to worry about
1662   // emitting movs.
1663   // FIXME: This doesn't work if one of the later SrcRegs is equal to an
1664   // earlier DestReg. We will have already overwritten over the register before
1665   // we can copy from it.
1666   for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1667     if (UsedMask[I])
1668       EmitAndCountInstruction(
1669           MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I]));
1670 
1671   // We emit a hard dependency on the __xray_TypedEvent symbol, which is the
1672   // name of the trampoline to be implemented by the XRay runtime.
1673   auto TSym = OutContext.getOrCreateSymbol("__xray_TypedEvent");
1674   MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym);
1675   if (isPositionIndependent())
1676     TOp.setTargetFlags(X86II::MO_PLT);
1677 
1678   // Emit the call instruction.
1679   EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32)
1680                               .addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
1681 
1682   // Restore caller-saved and used registers.
1683   for (unsigned I = sizeof UsedMask; I-- > 0;)
1684     if (UsedMask[I])
1685       EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
1686     else
1687       emitX86Nops(*OutStreamer, 1, Subtarget);
1688 
1689   OutStreamer->AddComment("xray typed event end.");
1690 
1691   // Record the sled version.
1692   recordSled(CurSled, MI, SledKind::TYPED_EVENT, 2);
1693 }
1694 
1695 void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
1696                                                   X86MCInstLower &MCIL) {
1697 
1698   NoAutoPaddingScope NoPadScope(*OutStreamer);
1699 
1700   const Function &F = MF->getFunction();
1701   if (F.hasFnAttribute("patchable-function-entry")) {
1702     unsigned Num;
1703     if (F.getFnAttribute("patchable-function-entry")
1704             .getValueAsString()
1705             .getAsInteger(10, Num))
1706       return;
1707     emitX86Nops(*OutStreamer, Num, Subtarget);
1708     return;
1709   }
1710   // We want to emit the following pattern:
1711   //
1712   //   .p2align 1, ...
1713   // .Lxray_sled_N:
1714   //   jmp .tmpN
1715   //   # 9 bytes worth of noops
1716   //
1717   // We need the 9 bytes because at runtime, we'd be patching over the full 11
1718   // bytes with the following pattern:
1719   //
1720   //   mov %r10, <function id, 32-bit>   // 6 bytes
1721   //   call <relative offset, 32-bits>   // 5 bytes
1722   //
1723   auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1724   OutStreamer->emitCodeAlignment(2, &getSubtargetInfo());
1725   OutStreamer->emitLabel(CurSled);
1726 
1727   // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1728   // an operand (computed as an offset from the jmp instruction).
1729   // FIXME: Find another less hacky way do force the relative jump.
1730   OutStreamer->emitBytes("\xeb\x09");
1731   emitX86Nops(*OutStreamer, 9, Subtarget);
1732   recordSled(CurSled, MI, SledKind::FUNCTION_ENTER, 2);
1733 }
1734 
1735 void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
1736                                        X86MCInstLower &MCIL) {
1737   NoAutoPaddingScope NoPadScope(*OutStreamer);
1738 
1739   // Since PATCHABLE_RET takes the opcode of the return statement as an
1740   // argument, we use that to emit the correct form of the RET that we want.
1741   // i.e. when we see this:
1742   //
1743   //   PATCHABLE_RET X86::RET ...
1744   //
1745   // We should emit the RET followed by sleds.
1746   //
1747   //   .p2align 1, ...
1748   // .Lxray_sled_N:
1749   //   ret  # or equivalent instruction
1750   //   # 10 bytes worth of noops
1751   //
1752   // This just makes sure that the alignment for the next instruction is 2.
1753   auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1754   OutStreamer->emitCodeAlignment(2, &getSubtargetInfo());
1755   OutStreamer->emitLabel(CurSled);
1756   unsigned OpCode = MI.getOperand(0).getImm();
1757   MCInst Ret;
1758   Ret.setOpcode(OpCode);
1759   for (auto &MO : drop_begin(MI.operands()))
1760     if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
1761       Ret.addOperand(*MaybeOperand);
1762   OutStreamer->emitInstruction(Ret, getSubtargetInfo());
1763   emitX86Nops(*OutStreamer, 10, Subtarget);
1764   recordSled(CurSled, MI, SledKind::FUNCTION_EXIT, 2);
1765 }
1766 
1767 void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,
1768                                              X86MCInstLower &MCIL) {
1769   NoAutoPaddingScope NoPadScope(*OutStreamer);
1770 
1771   // Like PATCHABLE_RET, we have the actual instruction in the operands to this
1772   // instruction so we lower that particular instruction and its operands.
1773   // Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how
1774   // we do it for PATCHABLE_FUNCTION_ENTER. The sled should be very similar to
1775   // the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual
1776   // tail call much like how we have it in PATCHABLE_RET.
1777   auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1778   OutStreamer->emitCodeAlignment(2, &getSubtargetInfo());
1779   OutStreamer->emitLabel(CurSled);
1780   auto Target = OutContext.createTempSymbol();
1781 
1782   // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1783   // an operand (computed as an offset from the jmp instruction).
1784   // FIXME: Find another less hacky way do force the relative jump.
1785   OutStreamer->emitBytes("\xeb\x09");
1786   emitX86Nops(*OutStreamer, 9, Subtarget);
1787   OutStreamer->emitLabel(Target);
1788   recordSled(CurSled, MI, SledKind::TAIL_CALL, 2);
1789 
1790   unsigned OpCode = MI.getOperand(0).getImm();
1791   OpCode = convertTailJumpOpcode(OpCode);
1792   MCInst TC;
1793   TC.setOpcode(OpCode);
1794 
1795   // Before emitting the instruction, add a comment to indicate that this is
1796   // indeed a tail call.
1797   OutStreamer->AddComment("TAILCALL");
1798   for (auto &MO : drop_begin(MI.operands()))
1799     if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
1800       TC.addOperand(*MaybeOperand);
1801   OutStreamer->emitInstruction(TC, getSubtargetInfo());
1802 }
1803 
1804 // Returns instruction preceding MBBI in MachineFunction.
1805 // If MBBI is the first instruction of the first basic block, returns null.
1806 static MachineBasicBlock::const_iterator
1807 PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) {
1808   const MachineBasicBlock *MBB = MBBI->getParent();
1809   while (MBBI == MBB->begin()) {
1810     if (MBB == &MBB->getParent()->front())
1811       return MachineBasicBlock::const_iterator();
1812     MBB = MBB->getPrevNode();
1813     MBBI = MBB->end();
1814   }
1815   --MBBI;
1816   return MBBI;
1817 }
1818 
1819 static const Constant *getConstantFromPool(const MachineInstr &MI,
1820                                            const MachineOperand &Op) {
1821   if (!Op.isCPI() || Op.getOffset() != 0)
1822     return nullptr;
1823 
1824   ArrayRef<MachineConstantPoolEntry> Constants =
1825       MI.getParent()->getParent()->getConstantPool()->getConstants();
1826   const MachineConstantPoolEntry &ConstantEntry = Constants[Op.getIndex()];
1827 
1828   // Bail if this is a machine constant pool entry, we won't be able to dig out
1829   // anything useful.
1830   if (ConstantEntry.isMachineConstantPoolEntry())
1831     return nullptr;
1832 
1833   return ConstantEntry.Val.ConstVal;
1834 }
1835 
1836 static std::string getShuffleComment(const MachineInstr *MI, unsigned SrcOp1Idx,
1837                                      unsigned SrcOp2Idx, ArrayRef<int> Mask) {
1838   std::string Comment;
1839 
1840   // Compute the name for a register. This is really goofy because we have
1841   // multiple instruction printers that could (in theory) use different
1842   // names. Fortunately most people use the ATT style (outside of Windows)
1843   // and they actually agree on register naming here. Ultimately, this is
1844   // a comment, and so its OK if it isn't perfect.
1845   auto GetRegisterName = [](unsigned RegNum) -> StringRef {
1846     return X86ATTInstPrinter::getRegisterName(RegNum);
1847   };
1848 
1849   const MachineOperand &DstOp = MI->getOperand(0);
1850   const MachineOperand &SrcOp1 = MI->getOperand(SrcOp1Idx);
1851   const MachineOperand &SrcOp2 = MI->getOperand(SrcOp2Idx);
1852 
1853   StringRef DstName = DstOp.isReg() ? GetRegisterName(DstOp.getReg()) : "mem";
1854   StringRef Src1Name =
1855       SrcOp1.isReg() ? GetRegisterName(SrcOp1.getReg()) : "mem";
1856   StringRef Src2Name =
1857       SrcOp2.isReg() ? GetRegisterName(SrcOp2.getReg()) : "mem";
1858 
1859   // One source operand, fix the mask to print all elements in one span.
1860   SmallVector<int, 8> ShuffleMask(Mask.begin(), Mask.end());
1861   if (Src1Name == Src2Name)
1862     for (int i = 0, e = ShuffleMask.size(); i != e; ++i)
1863       if (ShuffleMask[i] >= e)
1864         ShuffleMask[i] -= e;
1865 
1866   raw_string_ostream CS(Comment);
1867   CS << DstName;
1868 
1869   // Handle AVX512 MASK/MASXZ write mask comments.
1870   // MASK: zmmX {%kY}
1871   // MASKZ: zmmX {%kY} {z}
1872   if (SrcOp1Idx > 1) {
1873     assert((SrcOp1Idx == 2 || SrcOp1Idx == 3) && "Unexpected writemask");
1874 
1875     const MachineOperand &WriteMaskOp = MI->getOperand(SrcOp1Idx - 1);
1876     if (WriteMaskOp.isReg()) {
1877       CS << " {%" << GetRegisterName(WriteMaskOp.getReg()) << "}";
1878 
1879       if (SrcOp1Idx == 2) {
1880         CS << " {z}";
1881       }
1882     }
1883   }
1884 
1885   CS << " = ";
1886 
1887   for (int i = 0, e = ShuffleMask.size(); i != e; ++i) {
1888     if (i != 0)
1889       CS << ",";
1890     if (ShuffleMask[i] == SM_SentinelZero) {
1891       CS << "zero";
1892       continue;
1893     }
1894 
1895     // Otherwise, it must come from src1 or src2.  Print the span of elements
1896     // that comes from this src.
1897     bool isSrc1 = ShuffleMask[i] < (int)e;
1898     CS << (isSrc1 ? Src1Name : Src2Name) << '[';
1899 
1900     bool IsFirst = true;
1901     while (i != e && ShuffleMask[i] != SM_SentinelZero &&
1902            (ShuffleMask[i] < (int)e) == isSrc1) {
1903       if (!IsFirst)
1904         CS << ',';
1905       else
1906         IsFirst = false;
1907       if (ShuffleMask[i] == SM_SentinelUndef)
1908         CS << "u";
1909       else
1910         CS << ShuffleMask[i] % (int)e;
1911       ++i;
1912     }
1913     CS << ']';
1914     --i; // For loop increments element #.
1915   }
1916   CS.flush();
1917 
1918   return Comment;
1919 }
1920 
1921 static void printConstant(const APInt &Val, raw_ostream &CS) {
1922   if (Val.getBitWidth() <= 64) {
1923     CS << Val.getZExtValue();
1924   } else {
1925     // print multi-word constant as (w0,w1)
1926     CS << "(";
1927     for (int i = 0, N = Val.getNumWords(); i < N; ++i) {
1928       if (i > 0)
1929         CS << ",";
1930       CS << Val.getRawData()[i];
1931     }
1932     CS << ")";
1933   }
1934 }
1935 
1936 static void printConstant(const APFloat &Flt, raw_ostream &CS) {
1937   SmallString<32> Str;
1938   // Force scientific notation to distinquish from integers.
1939   Flt.toString(Str, 0, 0);
1940   CS << Str;
1941 }
1942 
1943 static void printConstant(const Constant *COp, raw_ostream &CS) {
1944   if (isa<UndefValue>(COp)) {
1945     CS << "u";
1946   } else if (auto *CI = dyn_cast<ConstantInt>(COp)) {
1947     printConstant(CI->getValue(), CS);
1948   } else if (auto *CF = dyn_cast<ConstantFP>(COp)) {
1949     printConstant(CF->getValueAPF(), CS);
1950   } else {
1951     CS << "?";
1952   }
1953 }
1954 
1955 void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) {
1956   assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
1957   assert(getSubtarget().isOSWindows() && "SEH_ instruction Windows only");
1958 
1959   // Use the .cv_fpo directives if we're emitting CodeView on 32-bit x86.
1960   if (EmitFPOData) {
1961     X86TargetStreamer *XTS =
1962         static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer());
1963     switch (MI->getOpcode()) {
1964     case X86::SEH_PushReg:
1965       XTS->emitFPOPushReg(MI->getOperand(0).getImm());
1966       break;
1967     case X86::SEH_StackAlloc:
1968       XTS->emitFPOStackAlloc(MI->getOperand(0).getImm());
1969       break;
1970     case X86::SEH_StackAlign:
1971       XTS->emitFPOStackAlign(MI->getOperand(0).getImm());
1972       break;
1973     case X86::SEH_SetFrame:
1974       assert(MI->getOperand(1).getImm() == 0 &&
1975              ".cv_fpo_setframe takes no offset");
1976       XTS->emitFPOSetFrame(MI->getOperand(0).getImm());
1977       break;
1978     case X86::SEH_EndPrologue:
1979       XTS->emitFPOEndPrologue();
1980       break;
1981     case X86::SEH_SaveReg:
1982     case X86::SEH_SaveXMM:
1983     case X86::SEH_PushFrame:
1984       llvm_unreachable("SEH_ directive incompatible with FPO");
1985       break;
1986     default:
1987       llvm_unreachable("expected SEH_ instruction");
1988     }
1989     return;
1990   }
1991 
1992   // Otherwise, use the .seh_ directives for all other Windows platforms.
1993   switch (MI->getOpcode()) {
1994   case X86::SEH_PushReg:
1995     OutStreamer->emitWinCFIPushReg(MI->getOperand(0).getImm());
1996     break;
1997 
1998   case X86::SEH_SaveReg:
1999     OutStreamer->emitWinCFISaveReg(MI->getOperand(0).getImm(),
2000                                    MI->getOperand(1).getImm());
2001     break;
2002 
2003   case X86::SEH_SaveXMM:
2004     OutStreamer->emitWinCFISaveXMM(MI->getOperand(0).getImm(),
2005                                    MI->getOperand(1).getImm());
2006     break;
2007 
2008   case X86::SEH_StackAlloc:
2009     OutStreamer->emitWinCFIAllocStack(MI->getOperand(0).getImm());
2010     break;
2011 
2012   case X86::SEH_SetFrame:
2013     OutStreamer->emitWinCFISetFrame(MI->getOperand(0).getImm(),
2014                                     MI->getOperand(1).getImm());
2015     break;
2016 
2017   case X86::SEH_PushFrame:
2018     OutStreamer->emitWinCFIPushFrame(MI->getOperand(0).getImm());
2019     break;
2020 
2021   case X86::SEH_EndPrologue:
2022     OutStreamer->emitWinCFIEndProlog();
2023     break;
2024 
2025   default:
2026     llvm_unreachable("expected SEH_ instruction");
2027   }
2028 }
2029 
2030 static unsigned getRegisterWidth(const MCOperandInfo &Info) {
2031   if (Info.RegClass == X86::VR128RegClassID ||
2032       Info.RegClass == X86::VR128XRegClassID)
2033     return 128;
2034   if (Info.RegClass == X86::VR256RegClassID ||
2035       Info.RegClass == X86::VR256XRegClassID)
2036     return 256;
2037   if (Info.RegClass == X86::VR512RegClassID)
2038     return 512;
2039   llvm_unreachable("Unknown register class!");
2040 }
2041 
2042 static void addConstantComments(const MachineInstr *MI,
2043                                 MCStreamer &OutStreamer) {
2044   switch (MI->getOpcode()) {
2045   // Lower PSHUFB and VPERMILP normally but add a comment if we can find
2046   // a constant shuffle mask. We won't be able to do this at the MC layer
2047   // because the mask isn't an immediate.
2048   case X86::PSHUFBrm:
2049   case X86::VPSHUFBrm:
2050   case X86::VPSHUFBYrm:
2051   case X86::VPSHUFBZ128rm:
2052   case X86::VPSHUFBZ128rmk:
2053   case X86::VPSHUFBZ128rmkz:
2054   case X86::VPSHUFBZ256rm:
2055   case X86::VPSHUFBZ256rmk:
2056   case X86::VPSHUFBZ256rmkz:
2057   case X86::VPSHUFBZrm:
2058   case X86::VPSHUFBZrmk:
2059   case X86::VPSHUFBZrmkz: {
2060     unsigned SrcIdx = 1;
2061     if (X86II::isKMasked(MI->getDesc().TSFlags)) {
2062       // Skip mask operand.
2063       ++SrcIdx;
2064       if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) {
2065         // Skip passthru operand.
2066         ++SrcIdx;
2067       }
2068     }
2069     unsigned MaskIdx = SrcIdx + 1 + X86::AddrDisp;
2070 
2071     assert(MI->getNumOperands() >= (SrcIdx + 1 + X86::AddrNumOperands) &&
2072            "Unexpected number of operands!");
2073 
2074     const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
2075     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2076       unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2077       SmallVector<int, 64> Mask;
2078       DecodePSHUFBMask(C, Width, Mask);
2079       if (!Mask.empty())
2080         OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
2081     }
2082     break;
2083   }
2084 
2085   case X86::VPERMILPSrm:
2086   case X86::VPERMILPSYrm:
2087   case X86::VPERMILPSZ128rm:
2088   case X86::VPERMILPSZ128rmk:
2089   case X86::VPERMILPSZ128rmkz:
2090   case X86::VPERMILPSZ256rm:
2091   case X86::VPERMILPSZ256rmk:
2092   case X86::VPERMILPSZ256rmkz:
2093   case X86::VPERMILPSZrm:
2094   case X86::VPERMILPSZrmk:
2095   case X86::VPERMILPSZrmkz:
2096   case X86::VPERMILPDrm:
2097   case X86::VPERMILPDYrm:
2098   case X86::VPERMILPDZ128rm:
2099   case X86::VPERMILPDZ128rmk:
2100   case X86::VPERMILPDZ128rmkz:
2101   case X86::VPERMILPDZ256rm:
2102   case X86::VPERMILPDZ256rmk:
2103   case X86::VPERMILPDZ256rmkz:
2104   case X86::VPERMILPDZrm:
2105   case X86::VPERMILPDZrmk:
2106   case X86::VPERMILPDZrmkz: {
2107     unsigned ElSize;
2108     switch (MI->getOpcode()) {
2109     default: llvm_unreachable("Invalid opcode");
2110     case X86::VPERMILPSrm:
2111     case X86::VPERMILPSYrm:
2112     case X86::VPERMILPSZ128rm:
2113     case X86::VPERMILPSZ256rm:
2114     case X86::VPERMILPSZrm:
2115     case X86::VPERMILPSZ128rmkz:
2116     case X86::VPERMILPSZ256rmkz:
2117     case X86::VPERMILPSZrmkz:
2118     case X86::VPERMILPSZ128rmk:
2119     case X86::VPERMILPSZ256rmk:
2120     case X86::VPERMILPSZrmk:
2121       ElSize = 32;
2122       break;
2123     case X86::VPERMILPDrm:
2124     case X86::VPERMILPDYrm:
2125     case X86::VPERMILPDZ128rm:
2126     case X86::VPERMILPDZ256rm:
2127     case X86::VPERMILPDZrm:
2128     case X86::VPERMILPDZ128rmkz:
2129     case X86::VPERMILPDZ256rmkz:
2130     case X86::VPERMILPDZrmkz:
2131     case X86::VPERMILPDZ128rmk:
2132     case X86::VPERMILPDZ256rmk:
2133     case X86::VPERMILPDZrmk:
2134       ElSize = 64;
2135       break;
2136     }
2137 
2138     unsigned SrcIdx = 1;
2139     if (X86II::isKMasked(MI->getDesc().TSFlags)) {
2140       // Skip mask operand.
2141       ++SrcIdx;
2142       if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) {
2143         // Skip passthru operand.
2144         ++SrcIdx;
2145       }
2146     }
2147     unsigned MaskIdx = SrcIdx + 1 + X86::AddrDisp;
2148 
2149     assert(MI->getNumOperands() >= (SrcIdx + 1 + X86::AddrNumOperands) &&
2150            "Unexpected number of operands!");
2151 
2152     const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
2153     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2154       unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2155       SmallVector<int, 16> Mask;
2156       DecodeVPERMILPMask(C, ElSize, Width, Mask);
2157       if (!Mask.empty())
2158         OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
2159     }
2160     break;
2161   }
2162 
2163   case X86::VPERMIL2PDrm:
2164   case X86::VPERMIL2PSrm:
2165   case X86::VPERMIL2PDYrm:
2166   case X86::VPERMIL2PSYrm: {
2167     assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands + 1) &&
2168            "Unexpected number of operands!");
2169 
2170     const MachineOperand &CtrlOp = MI->getOperand(MI->getNumOperands() - 1);
2171     if (!CtrlOp.isImm())
2172       break;
2173 
2174     unsigned ElSize;
2175     switch (MI->getOpcode()) {
2176     default: llvm_unreachable("Invalid opcode");
2177     case X86::VPERMIL2PSrm: case X86::VPERMIL2PSYrm: ElSize = 32; break;
2178     case X86::VPERMIL2PDrm: case X86::VPERMIL2PDYrm: ElSize = 64; break;
2179     }
2180 
2181     const MachineOperand &MaskOp = MI->getOperand(3 + X86::AddrDisp);
2182     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2183       unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2184       SmallVector<int, 16> Mask;
2185       DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Width, Mask);
2186       if (!Mask.empty())
2187         OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask));
2188     }
2189     break;
2190   }
2191 
2192   case X86::VPPERMrrm: {
2193     assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands) &&
2194            "Unexpected number of operands!");
2195 
2196     const MachineOperand &MaskOp = MI->getOperand(3 + X86::AddrDisp);
2197     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2198       unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2199       SmallVector<int, 16> Mask;
2200       DecodeVPPERMMask(C, Width, Mask);
2201       if (!Mask.empty())
2202         OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask));
2203     }
2204     break;
2205   }
2206 
2207   case X86::MMX_MOVQ64rm: {
2208     assert(MI->getNumOperands() == (1 + X86::AddrNumOperands) &&
2209            "Unexpected number of operands!");
2210     if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
2211       std::string Comment;
2212       raw_string_ostream CS(Comment);
2213       const MachineOperand &DstOp = MI->getOperand(0);
2214       CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
2215       if (auto *CF = dyn_cast<ConstantFP>(C)) {
2216         CS << "0x" << toString(CF->getValueAPF().bitcastToAPInt(), 16, false);
2217         OutStreamer.AddComment(CS.str());
2218       }
2219     }
2220     break;
2221   }
2222 
2223 #define MOV_CASE(Prefix, Suffix)                                               \
2224   case X86::Prefix##MOVAPD##Suffix##rm:                                        \
2225   case X86::Prefix##MOVAPS##Suffix##rm:                                        \
2226   case X86::Prefix##MOVUPD##Suffix##rm:                                        \
2227   case X86::Prefix##MOVUPS##Suffix##rm:                                        \
2228   case X86::Prefix##MOVDQA##Suffix##rm:                                        \
2229   case X86::Prefix##MOVDQU##Suffix##rm:
2230 
2231 #define MOV_AVX512_CASE(Suffix)                                                \
2232   case X86::VMOVDQA64##Suffix##rm:                                             \
2233   case X86::VMOVDQA32##Suffix##rm:                                             \
2234   case X86::VMOVDQU64##Suffix##rm:                                             \
2235   case X86::VMOVDQU32##Suffix##rm:                                             \
2236   case X86::VMOVDQU16##Suffix##rm:                                             \
2237   case X86::VMOVDQU8##Suffix##rm:                                              \
2238   case X86::VMOVAPS##Suffix##rm:                                               \
2239   case X86::VMOVAPD##Suffix##rm:                                               \
2240   case X86::VMOVUPS##Suffix##rm:                                               \
2241   case X86::VMOVUPD##Suffix##rm:
2242 
2243 #define CASE_ALL_MOV_RM()                                                      \
2244   MOV_CASE(, )   /* SSE */                                                     \
2245   MOV_CASE(V, )  /* AVX-128 */                                                 \
2246   MOV_CASE(V, Y) /* AVX-256 */                                                 \
2247   MOV_AVX512_CASE(Z)                                                           \
2248   MOV_AVX512_CASE(Z256)                                                        \
2249   MOV_AVX512_CASE(Z128)
2250 
2251     // For loads from a constant pool to a vector register, print the constant
2252     // loaded.
2253     CASE_ALL_MOV_RM()
2254   case X86::VBROADCASTF128:
2255   case X86::VBROADCASTI128:
2256   case X86::VBROADCASTF32X4Z256rm:
2257   case X86::VBROADCASTF32X4rm:
2258   case X86::VBROADCASTF32X8rm:
2259   case X86::VBROADCASTF64X2Z128rm:
2260   case X86::VBROADCASTF64X2rm:
2261   case X86::VBROADCASTF64X4rm:
2262   case X86::VBROADCASTI32X4Z256rm:
2263   case X86::VBROADCASTI32X4rm:
2264   case X86::VBROADCASTI32X8rm:
2265   case X86::VBROADCASTI64X2Z128rm:
2266   case X86::VBROADCASTI64X2rm:
2267   case X86::VBROADCASTI64X4rm:
2268     assert(MI->getNumOperands() >= (1 + X86::AddrNumOperands) &&
2269            "Unexpected number of operands!");
2270     if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
2271       int NumLanes = 1;
2272       // Override NumLanes for the broadcast instructions.
2273       switch (MI->getOpcode()) {
2274       case X86::VBROADCASTF128:        NumLanes = 2; break;
2275       case X86::VBROADCASTI128:        NumLanes = 2; break;
2276       case X86::VBROADCASTF32X4Z256rm: NumLanes = 2; break;
2277       case X86::VBROADCASTF32X4rm:     NumLanes = 4; break;
2278       case X86::VBROADCASTF32X8rm:     NumLanes = 2; break;
2279       case X86::VBROADCASTF64X2Z128rm: NumLanes = 2; break;
2280       case X86::VBROADCASTF64X2rm:     NumLanes = 4; break;
2281       case X86::VBROADCASTF64X4rm:     NumLanes = 2; break;
2282       case X86::VBROADCASTI32X4Z256rm: NumLanes = 2; break;
2283       case X86::VBROADCASTI32X4rm:     NumLanes = 4; break;
2284       case X86::VBROADCASTI32X8rm:     NumLanes = 2; break;
2285       case X86::VBROADCASTI64X2Z128rm: NumLanes = 2; break;
2286       case X86::VBROADCASTI64X2rm:     NumLanes = 4; break;
2287       case X86::VBROADCASTI64X4rm:     NumLanes = 2; break;
2288       }
2289 
2290       std::string Comment;
2291       raw_string_ostream CS(Comment);
2292       const MachineOperand &DstOp = MI->getOperand(0);
2293       CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
2294       if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
2295         CS << "[";
2296         for (int l = 0; l != NumLanes; ++l) {
2297           for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements;
2298                ++i) {
2299             if (i != 0 || l != 0)
2300               CS << ",";
2301             if (CDS->getElementType()->isIntegerTy())
2302               printConstant(CDS->getElementAsAPInt(i), CS);
2303             else if (CDS->getElementType()->isHalfTy() ||
2304                      CDS->getElementType()->isFloatTy() ||
2305                      CDS->getElementType()->isDoubleTy())
2306               printConstant(CDS->getElementAsAPFloat(i), CS);
2307             else
2308               CS << "?";
2309           }
2310         }
2311         CS << "]";
2312         OutStreamer.AddComment(CS.str());
2313       } else if (auto *CV = dyn_cast<ConstantVector>(C)) {
2314         CS << "<";
2315         for (int l = 0; l != NumLanes; ++l) {
2316           for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands;
2317                ++i) {
2318             if (i != 0 || l != 0)
2319               CS << ",";
2320             printConstant(CV->getOperand(i), CS);
2321           }
2322         }
2323         CS << ">";
2324         OutStreamer.AddComment(CS.str());
2325       }
2326     }
2327     break;
2328 
2329   case X86::MOVDDUPrm:
2330   case X86::VMOVDDUPrm:
2331   case X86::VMOVDDUPZ128rm:
2332   case X86::VBROADCASTSSrm:
2333   case X86::VBROADCASTSSYrm:
2334   case X86::VBROADCASTSSZ128rm:
2335   case X86::VBROADCASTSSZ256rm:
2336   case X86::VBROADCASTSSZrm:
2337   case X86::VBROADCASTSDYrm:
2338   case X86::VBROADCASTSDZ256rm:
2339   case X86::VBROADCASTSDZrm:
2340   case X86::VPBROADCASTBrm:
2341   case X86::VPBROADCASTBYrm:
2342   case X86::VPBROADCASTBZ128rm:
2343   case X86::VPBROADCASTBZ256rm:
2344   case X86::VPBROADCASTBZrm:
2345   case X86::VPBROADCASTDrm:
2346   case X86::VPBROADCASTDYrm:
2347   case X86::VPBROADCASTDZ128rm:
2348   case X86::VPBROADCASTDZ256rm:
2349   case X86::VPBROADCASTDZrm:
2350   case X86::VPBROADCASTQrm:
2351   case X86::VPBROADCASTQYrm:
2352   case X86::VPBROADCASTQZ128rm:
2353   case X86::VPBROADCASTQZ256rm:
2354   case X86::VPBROADCASTQZrm:
2355   case X86::VPBROADCASTWrm:
2356   case X86::VPBROADCASTWYrm:
2357   case X86::VPBROADCASTWZ128rm:
2358   case X86::VPBROADCASTWZ256rm:
2359   case X86::VPBROADCASTWZrm:
2360     assert(MI->getNumOperands() >= (1 + X86::AddrNumOperands) &&
2361            "Unexpected number of operands!");
2362     if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
2363       int NumElts;
2364       switch (MI->getOpcode()) {
2365       default: llvm_unreachable("Invalid opcode");
2366       case X86::MOVDDUPrm:          NumElts = 2;  break;
2367       case X86::VMOVDDUPrm:         NumElts = 2;  break;
2368       case X86::VMOVDDUPZ128rm:     NumElts = 2;  break;
2369       case X86::VBROADCASTSSrm:     NumElts = 4;  break;
2370       case X86::VBROADCASTSSYrm:    NumElts = 8;  break;
2371       case X86::VBROADCASTSSZ128rm: NumElts = 4;  break;
2372       case X86::VBROADCASTSSZ256rm: NumElts = 8;  break;
2373       case X86::VBROADCASTSSZrm:    NumElts = 16; break;
2374       case X86::VBROADCASTSDYrm:    NumElts = 4;  break;
2375       case X86::VBROADCASTSDZ256rm: NumElts = 4;  break;
2376       case X86::VBROADCASTSDZrm:    NumElts = 8;  break;
2377       case X86::VPBROADCASTBrm:     NumElts = 16; break;
2378       case X86::VPBROADCASTBYrm:    NumElts = 32; break;
2379       case X86::VPBROADCASTBZ128rm: NumElts = 16; break;
2380       case X86::VPBROADCASTBZ256rm: NumElts = 32; break;
2381       case X86::VPBROADCASTBZrm:    NumElts = 64; break;
2382       case X86::VPBROADCASTDrm:     NumElts = 4;  break;
2383       case X86::VPBROADCASTDYrm:    NumElts = 8;  break;
2384       case X86::VPBROADCASTDZ128rm: NumElts = 4;  break;
2385       case X86::VPBROADCASTDZ256rm: NumElts = 8;  break;
2386       case X86::VPBROADCASTDZrm:    NumElts = 16; break;
2387       case X86::VPBROADCASTQrm:     NumElts = 2;  break;
2388       case X86::VPBROADCASTQYrm:    NumElts = 4;  break;
2389       case X86::VPBROADCASTQZ128rm: NumElts = 2;  break;
2390       case X86::VPBROADCASTQZ256rm: NumElts = 4;  break;
2391       case X86::VPBROADCASTQZrm:    NumElts = 8;  break;
2392       case X86::VPBROADCASTWrm:     NumElts = 8;  break;
2393       case X86::VPBROADCASTWYrm:    NumElts = 16; break;
2394       case X86::VPBROADCASTWZ128rm: NumElts = 8;  break;
2395       case X86::VPBROADCASTWZ256rm: NumElts = 16; break;
2396       case X86::VPBROADCASTWZrm:    NumElts = 32; break;
2397       }
2398 
2399       std::string Comment;
2400       raw_string_ostream CS(Comment);
2401       const MachineOperand &DstOp = MI->getOperand(0);
2402       CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
2403       CS << "[";
2404       for (int i = 0; i != NumElts; ++i) {
2405         if (i != 0)
2406           CS << ",";
2407         printConstant(C, CS);
2408       }
2409       CS << "]";
2410       OutStreamer.AddComment(CS.str());
2411     }
2412   }
2413 }
2414 
2415 void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
2416   // FIXME: Enable feature predicate checks once all the test pass.
2417   // X86_MC::verifyInstructionPredicates(MI->getOpcode(),
2418   //                                     Subtarget->getFeatureBits());
2419 
2420   X86MCInstLower MCInstLowering(*MF, *this);
2421   const X86RegisterInfo *RI =
2422       MF->getSubtarget<X86Subtarget>().getRegisterInfo();
2423 
2424   if (MI->getOpcode() == X86::OR64rm) {
2425     for (auto &Opd : MI->operands()) {
2426       if (Opd.isSymbol() && StringRef(Opd.getSymbolName()) ==
2427                                 "swift_async_extendedFramePointerFlags") {
2428         ShouldEmitWeakSwiftAsyncExtendedFramePointerFlags = true;
2429       }
2430     }
2431   }
2432 
2433   // Add a comment about EVEX-2-VEX compression for AVX-512 instrs that
2434   // are compressed from EVEX encoding to VEX encoding.
2435   if (TM.Options.MCOptions.ShowMCEncoding) {
2436     if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX)
2437       OutStreamer->AddComment("EVEX TO VEX Compression ", false);
2438   }
2439 
2440   // Add comments for values loaded from constant pool.
2441   if (OutStreamer->isVerboseAsm())
2442     addConstantComments(MI, *OutStreamer);
2443 
2444   switch (MI->getOpcode()) {
2445   case TargetOpcode::DBG_VALUE:
2446     llvm_unreachable("Should be handled target independently");
2447 
2448   // Emit nothing here but a comment if we can.
2449   case X86::Int_MemBarrier:
2450     OutStreamer->emitRawComment("MEMBARRIER");
2451     return;
2452 
2453   case X86::EH_RETURN:
2454   case X86::EH_RETURN64: {
2455     // Lower these as normal, but add some comments.
2456     Register Reg = MI->getOperand(0).getReg();
2457     OutStreamer->AddComment(StringRef("eh_return, addr: %") +
2458                             X86ATTInstPrinter::getRegisterName(Reg));
2459     break;
2460   }
2461   case X86::CLEANUPRET: {
2462     // Lower these as normal, but add some comments.
2463     OutStreamer->AddComment("CLEANUPRET");
2464     break;
2465   }
2466 
2467   case X86::CATCHRET: {
2468     // Lower these as normal, but add some comments.
2469     OutStreamer->AddComment("CATCHRET");
2470     break;
2471   }
2472 
2473   case X86::ENDBR32:
2474   case X86::ENDBR64: {
2475     // CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for
2476     // -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be
2477     // non-empty. If MI is the initial ENDBR, place the
2478     // __patchable_function_entries label after ENDBR.
2479     if (CurrentPatchableFunctionEntrySym &&
2480         CurrentPatchableFunctionEntrySym == CurrentFnBegin &&
2481         MI == &MF->front().front()) {
2482       MCInst Inst;
2483       MCInstLowering.Lower(MI, Inst);
2484       EmitAndCountInstruction(Inst);
2485       CurrentPatchableFunctionEntrySym = createTempSymbol("patch");
2486       OutStreamer->emitLabel(CurrentPatchableFunctionEntrySym);
2487       return;
2488     }
2489     break;
2490   }
2491 
2492   case X86::TAILJMPr:
2493   case X86::TAILJMPm:
2494   case X86::TAILJMPd:
2495   case X86::TAILJMPd_CC:
2496   case X86::TAILJMPr64:
2497   case X86::TAILJMPm64:
2498   case X86::TAILJMPd64:
2499   case X86::TAILJMPd64_CC:
2500   case X86::TAILJMPr64_REX:
2501   case X86::TAILJMPm64_REX:
2502     // Lower these as normal, but add some comments.
2503     OutStreamer->AddComment("TAILCALL");
2504     break;
2505 
2506   case X86::TLS_addr32:
2507   case X86::TLS_addr64:
2508   case X86::TLS_addrX32:
2509   case X86::TLS_base_addr32:
2510   case X86::TLS_base_addr64:
2511   case X86::TLS_base_addrX32:
2512     return LowerTlsAddr(MCInstLowering, *MI);
2513 
2514   case X86::MOVPC32r: {
2515     // This is a pseudo op for a two instruction sequence with a label, which
2516     // looks like:
2517     //     call "L1$pb"
2518     // "L1$pb":
2519     //     popl %esi
2520 
2521     // Emit the call.
2522     MCSymbol *PICBase = MF->getPICBaseSymbol();
2523     // FIXME: We would like an efficient form for this, so we don't have to do a
2524     // lot of extra uniquing.
2525     EmitAndCountInstruction(
2526         MCInstBuilder(X86::CALLpcrel32)
2527             .addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
2528 
2529     const X86FrameLowering *FrameLowering =
2530         MF->getSubtarget<X86Subtarget>().getFrameLowering();
2531     bool hasFP = FrameLowering->hasFP(*MF);
2532 
2533     // TODO: This is needed only if we require precise CFA.
2534     bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() &&
2535                                !OutStreamer->getDwarfFrameInfos().back().End;
2536 
2537     int stackGrowth = -RI->getSlotSize();
2538 
2539     if (HasActiveDwarfFrame && !hasFP) {
2540       OutStreamer->emitCFIAdjustCfaOffset(-stackGrowth);
2541     }
2542 
2543     // Emit the label.
2544     OutStreamer->emitLabel(PICBase);
2545 
2546     // popl $reg
2547     EmitAndCountInstruction(
2548         MCInstBuilder(X86::POP32r).addReg(MI->getOperand(0).getReg()));
2549 
2550     if (HasActiveDwarfFrame && !hasFP) {
2551       OutStreamer->emitCFIAdjustCfaOffset(stackGrowth);
2552     }
2553     return;
2554   }
2555 
2556   case X86::ADD32ri: {
2557     // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
2558     if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS)
2559       break;
2560 
2561     // Okay, we have something like:
2562     //  EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL)
2563 
2564     // For this, we want to print something like:
2565     //   MYGLOBAL + (. - PICBASE)
2566     // However, we can't generate a ".", so just emit a new label here and refer
2567     // to it.
2568     MCSymbol *DotSym = OutContext.createTempSymbol();
2569     OutStreamer->emitLabel(DotSym);
2570 
2571     // Now that we have emitted the label, lower the complex operand expression.
2572     MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2));
2573 
2574     const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext);
2575     const MCExpr *PICBase =
2576         MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext);
2577     DotExpr = MCBinaryExpr::createSub(DotExpr, PICBase, OutContext);
2578 
2579     DotExpr = MCBinaryExpr::createAdd(
2580         MCSymbolRefExpr::create(OpSym, OutContext), DotExpr, OutContext);
2581 
2582     EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri)
2583                                 .addReg(MI->getOperand(0).getReg())
2584                                 .addReg(MI->getOperand(1).getReg())
2585                                 .addExpr(DotExpr));
2586     return;
2587   }
2588   case TargetOpcode::STATEPOINT:
2589     return LowerSTATEPOINT(*MI, MCInstLowering);
2590 
2591   case TargetOpcode::FAULTING_OP:
2592     return LowerFAULTING_OP(*MI, MCInstLowering);
2593 
2594   case TargetOpcode::FENTRY_CALL:
2595     return LowerFENTRY_CALL(*MI, MCInstLowering);
2596 
2597   case TargetOpcode::PATCHABLE_OP:
2598     return LowerPATCHABLE_OP(*MI, MCInstLowering);
2599 
2600   case TargetOpcode::STACKMAP:
2601     return LowerSTACKMAP(*MI);
2602 
2603   case TargetOpcode::PATCHPOINT:
2604     return LowerPATCHPOINT(*MI, MCInstLowering);
2605 
2606   case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
2607     return LowerPATCHABLE_FUNCTION_ENTER(*MI, MCInstLowering);
2608 
2609   case TargetOpcode::PATCHABLE_RET:
2610     return LowerPATCHABLE_RET(*MI, MCInstLowering);
2611 
2612   case TargetOpcode::PATCHABLE_TAIL_CALL:
2613     return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering);
2614 
2615   case TargetOpcode::PATCHABLE_EVENT_CALL:
2616     return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering);
2617 
2618   case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
2619     return LowerPATCHABLE_TYPED_EVENT_CALL(*MI, MCInstLowering);
2620 
2621   case X86::MORESTACK_RET:
2622     EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
2623     return;
2624 
2625   case X86::ASAN_CHECK_MEMACCESS:
2626     return LowerASAN_CHECK_MEMACCESS(*MI);
2627 
2628   case X86::MORESTACK_RET_RESTORE_R10:
2629     // Return, then restore R10.
2630     EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
2631     EmitAndCountInstruction(
2632         MCInstBuilder(X86::MOV64rr).addReg(X86::R10).addReg(X86::RAX));
2633     return;
2634 
2635   case X86::SEH_PushReg:
2636   case X86::SEH_SaveReg:
2637   case X86::SEH_SaveXMM:
2638   case X86::SEH_StackAlloc:
2639   case X86::SEH_StackAlign:
2640   case X86::SEH_SetFrame:
2641   case X86::SEH_PushFrame:
2642   case X86::SEH_EndPrologue:
2643     EmitSEHInstruction(MI);
2644     return;
2645 
2646   case X86::SEH_Epilogue: {
2647     assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
2648     MachineBasicBlock::const_iterator MBBI(MI);
2649     // Check if preceded by a call and emit nop if so.
2650     for (MBBI = PrevCrossBBInst(MBBI);
2651          MBBI != MachineBasicBlock::const_iterator();
2652          MBBI = PrevCrossBBInst(MBBI)) {
2653       // Conservatively assume that pseudo instructions don't emit code and keep
2654       // looking for a call. We may emit an unnecessary nop in some cases.
2655       if (!MBBI->isPseudo()) {
2656         if (MBBI->isCall())
2657           EmitAndCountInstruction(MCInstBuilder(X86::NOOP));
2658         break;
2659       }
2660     }
2661     return;
2662   }
2663   case X86::UBSAN_UD1:
2664     EmitAndCountInstruction(MCInstBuilder(X86::UD1Lm)
2665                                 .addReg(X86::EAX)
2666                                 .addReg(X86::EAX)
2667                                 .addImm(1)
2668                                 .addReg(X86::NoRegister)
2669                                 .addImm(MI->getOperand(0).getImm())
2670                                 .addReg(X86::NoRegister));
2671     return;
2672   }
2673 
2674   MCInst TmpInst;
2675   MCInstLowering.Lower(MI, TmpInst);
2676 
2677   // Stackmap shadows cannot include branch targets, so we can count the bytes
2678   // in a call towards the shadow, but must ensure that the no thread returns
2679   // in to the stackmap shadow.  The only way to achieve this is if the call
2680   // is at the end of the shadow.
2681   if (MI->isCall()) {
2682     // Count then size of the call towards the shadow
2683     SMShadowTracker.count(TmpInst, getSubtargetInfo(), CodeEmitter.get());
2684     // Then flush the shadow so that we fill with nops before the call, not
2685     // after it.
2686     SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
2687     // Then emit the call
2688     OutStreamer->emitInstruction(TmpInst, getSubtargetInfo());
2689     return;
2690   }
2691 
2692   EmitAndCountInstruction(TmpInst);
2693 }
2694