1 //===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MCTargetDesc/X86BaseInfo.h"
10 #include "MCTargetDesc/X86FixupKinds.h"
11 #include "llvm/ADT/StringSwitch.h"
12 #include "llvm/BinaryFormat/ELF.h"
13 #include "llvm/BinaryFormat/MachO.h"
14 #include "llvm/MC/MCAsmBackend.h"
15 #include "llvm/MC/MCAsmLayout.h"
16 #include "llvm/MC/MCAssembler.h"
17 #include "llvm/MC/MCCodeEmitter.h"
18 #include "llvm/MC/MCContext.h"
19 #include "llvm/MC/MCDwarf.h"
20 #include "llvm/MC/MCELFObjectWriter.h"
21 #include "llvm/MC/MCExpr.h"
22 #include "llvm/MC/MCFixupKindInfo.h"
23 #include "llvm/MC/MCInst.h"
24 #include "llvm/MC/MCInstrInfo.h"
25 #include "llvm/MC/MCMachObjectWriter.h"
26 #include "llvm/MC/MCObjectStreamer.h"
27 #include "llvm/MC/MCObjectWriter.h"
28 #include "llvm/MC/MCRegisterInfo.h"
29 #include "llvm/MC/MCSectionMachO.h"
30 #include "llvm/MC/MCSubtargetInfo.h"
31 #include "llvm/MC/MCValue.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Support/TargetRegistry.h"
35 #include "llvm/Support/raw_ostream.h"
36 
37 using namespace llvm;
38 
39 namespace {
40 /// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind
41 class X86AlignBranchKind {
42 private:
43   uint8_t AlignBranchKind = 0;
44 
45 public:
46   void operator=(const std::string &Val) {
47     if (Val.empty())
48       return;
49     SmallVector<StringRef, 6> BranchTypes;
50     StringRef(Val).split(BranchTypes, '+', -1, false);
51     for (auto BranchType : BranchTypes) {
52       if (BranchType == "fused")
53         addKind(X86::AlignBranchFused);
54       else if (BranchType == "jcc")
55         addKind(X86::AlignBranchJcc);
56       else if (BranchType == "jmp")
57         addKind(X86::AlignBranchJmp);
58       else if (BranchType == "call")
59         addKind(X86::AlignBranchCall);
60       else if (BranchType == "ret")
61         addKind(X86::AlignBranchRet);
62       else if (BranchType == "indirect")
63         addKind(X86::AlignBranchIndirect);
64       else {
65         errs() << "invalid argument " << BranchType.str()
66                << " to -x86-align-branch=; each element must be one of: fused, "
67                   "jcc, jmp, call, ret, indirect.(plus separated)\n";
68       }
69     }
70   }
71 
72   operator uint8_t() const { return AlignBranchKind; }
73   void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; }
74 };
75 
76 X86AlignBranchKind X86AlignBranchKindLoc;
77 
78 cl::opt<unsigned> X86AlignBranchBoundary(
79     "x86-align-branch-boundary", cl::init(0),
80     cl::desc(
81         "Control how the assembler should align branches with NOP. If the "
82         "boundary's size is not 0, it should be a power of 2 and no less "
83         "than 32. Branches will be aligned to prevent from being across or "
84         "against the boundary of specified size. The default value 0 does not "
85         "align branches."));
86 
87 cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch(
88     "x86-align-branch",
89     cl::desc(
90         "Specify types of branches to align (plus separated list of types):"
91              "\njcc      indicates conditional jumps"
92              "\nfused    indicates fused conditional jumps"
93              "\njmp      indicates direct unconditional jumps"
94              "\ncall     indicates direct and indirect calls"
95              "\nret      indicates rets"
96              "\nindirect indicates indirect unconditional jumps"),
97     cl::location(X86AlignBranchKindLoc));
98 
99 cl::opt<bool> X86AlignBranchWithin32BBoundaries(
100     "x86-branches-within-32B-boundaries", cl::init(false),
101     cl::desc(
102         "Align selected instructions to mitigate negative performance impact "
103         "of Intel's micro code update for errata skx102.  May break "
104         "assumptions about labels corresponding to particular instructions, "
105         "and should be used with caution."));
106 
107 cl::opt<unsigned> X86PadMaxPrefixSize(
108     "x86-pad-max-prefix-size", cl::init(0),
109     cl::desc("Maximum number of prefixes to use for padding"));
110 
111 cl::opt<bool> X86PadForAlign(
112     "x86-pad-for-align", cl::init(false), cl::Hidden,
113     cl::desc("Pad previous instructions to implement align directives"));
114 
115 cl::opt<bool> X86PadForBranchAlign(
116     "x86-pad-for-branch-align", cl::init(true), cl::Hidden,
117     cl::desc("Pad previous instructions to implement branch alignment"));
118 
119 class X86ELFObjectWriter : public MCELFObjectTargetWriter {
120 public:
121   X86ELFObjectWriter(bool is64Bit, uint8_t OSABI, uint16_t EMachine,
122                      bool HasRelocationAddend, bool foobar)
123     : MCELFObjectTargetWriter(is64Bit, OSABI, EMachine, HasRelocationAddend) {}
124 };
125 
126 class X86AsmBackend : public MCAsmBackend {
127   const MCSubtargetInfo &STI;
128   std::unique_ptr<const MCInstrInfo> MCII;
129   X86AlignBranchKind AlignBranchType;
130   Align AlignBoundary;
131   unsigned TargetPrefixMax = 0;
132 
133   MCInst PrevInst;
134   MCBoundaryAlignFragment *PendingBA = nullptr;
135   std::pair<MCFragment *, size_t> PrevInstPosition;
136   bool CanPadInst;
137 
138   uint8_t determinePaddingPrefix(const MCInst &Inst) const;
139   bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;
140   bool needAlign(const MCInst &Inst) const;
141   bool canPadBranches(MCObjectStreamer &OS) const;
142   bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const;
143 
144 public:
145   X86AsmBackend(const Target &T, const MCSubtargetInfo &STI)
146       : MCAsmBackend(support::little), STI(STI),
147         MCII(T.createMCInstrInfo()) {
148     if (X86AlignBranchWithin32BBoundaries) {
149       // At the moment, this defaults to aligning fused branches, unconditional
150       // jumps, and (unfused) conditional jumps with nops.  Both the
151       // instructions aligned and the alignment method (nop vs prefix) may
152       // change in the future.
153       AlignBoundary = assumeAligned(32);;
154       AlignBranchType.addKind(X86::AlignBranchFused);
155       AlignBranchType.addKind(X86::AlignBranchJcc);
156       AlignBranchType.addKind(X86::AlignBranchJmp);
157     }
158     // Allow overriding defaults set by master flag
159     if (X86AlignBranchBoundary.getNumOccurrences())
160       AlignBoundary = assumeAligned(X86AlignBranchBoundary);
161     if (X86AlignBranch.getNumOccurrences())
162       AlignBranchType = X86AlignBranchKindLoc;
163     if (X86PadMaxPrefixSize.getNumOccurrences())
164       TargetPrefixMax = X86PadMaxPrefixSize;
165   }
166 
167   bool allowAutoPadding() const override;
168   bool allowEnhancedRelaxation() const override;
169   void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst) override;
170   void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) override;
171 
172   unsigned getNumFixupKinds() const override {
173     return X86::NumTargetFixupKinds;
174   }
175 
176   Optional<MCFixupKind> getFixupKind(StringRef Name) const override;
177 
178   const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
179 
180   bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
181                              const MCValue &Target) override;
182 
183   void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
184                   const MCValue &Target, MutableArrayRef<char> Data,
185                   uint64_t Value, bool IsResolved,
186                   const MCSubtargetInfo *STI) const override;
187 
188   bool mayNeedRelaxation(const MCInst &Inst,
189                          const MCSubtargetInfo &STI) const override;
190 
191   bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
192                             const MCRelaxableFragment *DF,
193                             const MCAsmLayout &Layout) const override;
194 
195   void relaxInstruction(MCInst &Inst,
196                         const MCSubtargetInfo &STI) const override;
197 
198   bool padInstructionViaRelaxation(MCRelaxableFragment &RF,
199                                    MCCodeEmitter &Emitter,
200                                    unsigned &RemainingSize) const;
201 
202   bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
203                                unsigned &RemainingSize) const;
204 
205   bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
206                               unsigned &RemainingSize) const;
207 
208   void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override;
209 
210   unsigned getMaximumNopSize() const override;
211 
212   bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
213 };
214 } // end anonymous namespace
215 
216 static unsigned getRelaxedOpcodeBranch(const MCInst &Inst, bool Is16BitMode) {
217   unsigned Op = Inst.getOpcode();
218   switch (Op) {
219   default:
220     return Op;
221   case X86::JCC_1:
222     return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4;
223   case X86::JMP_1:
224     return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4;
225   }
226 }
227 
228 static unsigned getRelaxedOpcodeArith(const MCInst &Inst) {
229   unsigned Op = Inst.getOpcode();
230   switch (Op) {
231   default:
232     return Op;
233 
234     // IMUL
235   case X86::IMUL16rri8: return X86::IMUL16rri;
236   case X86::IMUL16rmi8: return X86::IMUL16rmi;
237   case X86::IMUL32rri8: return X86::IMUL32rri;
238   case X86::IMUL32rmi8: return X86::IMUL32rmi;
239   case X86::IMUL64rri8: return X86::IMUL64rri32;
240   case X86::IMUL64rmi8: return X86::IMUL64rmi32;
241 
242     // AND
243   case X86::AND16ri8: return X86::AND16ri;
244   case X86::AND16mi8: return X86::AND16mi;
245   case X86::AND32ri8: return X86::AND32ri;
246   case X86::AND32mi8: return X86::AND32mi;
247   case X86::AND64ri8: return X86::AND64ri32;
248   case X86::AND64mi8: return X86::AND64mi32;
249 
250     // OR
251   case X86::OR16ri8: return X86::OR16ri;
252   case X86::OR16mi8: return X86::OR16mi;
253   case X86::OR32ri8: return X86::OR32ri;
254   case X86::OR32mi8: return X86::OR32mi;
255   case X86::OR64ri8: return X86::OR64ri32;
256   case X86::OR64mi8: return X86::OR64mi32;
257 
258     // XOR
259   case X86::XOR16ri8: return X86::XOR16ri;
260   case X86::XOR16mi8: return X86::XOR16mi;
261   case X86::XOR32ri8: return X86::XOR32ri;
262   case X86::XOR32mi8: return X86::XOR32mi;
263   case X86::XOR64ri8: return X86::XOR64ri32;
264   case X86::XOR64mi8: return X86::XOR64mi32;
265 
266     // ADD
267   case X86::ADD16ri8: return X86::ADD16ri;
268   case X86::ADD16mi8: return X86::ADD16mi;
269   case X86::ADD32ri8: return X86::ADD32ri;
270   case X86::ADD32mi8: return X86::ADD32mi;
271   case X86::ADD64ri8: return X86::ADD64ri32;
272   case X86::ADD64mi8: return X86::ADD64mi32;
273 
274    // ADC
275   case X86::ADC16ri8: return X86::ADC16ri;
276   case X86::ADC16mi8: return X86::ADC16mi;
277   case X86::ADC32ri8: return X86::ADC32ri;
278   case X86::ADC32mi8: return X86::ADC32mi;
279   case X86::ADC64ri8: return X86::ADC64ri32;
280   case X86::ADC64mi8: return X86::ADC64mi32;
281 
282     // SUB
283   case X86::SUB16ri8: return X86::SUB16ri;
284   case X86::SUB16mi8: return X86::SUB16mi;
285   case X86::SUB32ri8: return X86::SUB32ri;
286   case X86::SUB32mi8: return X86::SUB32mi;
287   case X86::SUB64ri8: return X86::SUB64ri32;
288   case X86::SUB64mi8: return X86::SUB64mi32;
289 
290    // SBB
291   case X86::SBB16ri8: return X86::SBB16ri;
292   case X86::SBB16mi8: return X86::SBB16mi;
293   case X86::SBB32ri8: return X86::SBB32ri;
294   case X86::SBB32mi8: return X86::SBB32mi;
295   case X86::SBB64ri8: return X86::SBB64ri32;
296   case X86::SBB64mi8: return X86::SBB64mi32;
297 
298     // CMP
299   case X86::CMP16ri8: return X86::CMP16ri;
300   case X86::CMP16mi8: return X86::CMP16mi;
301   case X86::CMP32ri8: return X86::CMP32ri;
302   case X86::CMP32mi8: return X86::CMP32mi;
303   case X86::CMP64ri8: return X86::CMP64ri32;
304   case X86::CMP64mi8: return X86::CMP64mi32;
305 
306     // PUSH
307   case X86::PUSH32i8:  return X86::PUSHi32;
308   case X86::PUSH16i8:  return X86::PUSHi16;
309   case X86::PUSH64i8:  return X86::PUSH64i32;
310   }
311 }
312 
313 static unsigned getRelaxedOpcode(const MCInst &Inst, bool Is16BitMode) {
314   unsigned R = getRelaxedOpcodeArith(Inst);
315   if (R != Inst.getOpcode())
316     return R;
317   return getRelaxedOpcodeBranch(Inst, Is16BitMode);
318 }
319 
320 static X86::CondCode getCondFromBranch(const MCInst &MI,
321                                        const MCInstrInfo &MCII) {
322   unsigned Opcode = MI.getOpcode();
323   switch (Opcode) {
324   default:
325     return X86::COND_INVALID;
326   case X86::JCC_1: {
327     const MCInstrDesc &Desc = MCII.get(Opcode);
328     return static_cast<X86::CondCode>(
329         MI.getOperand(Desc.getNumOperands() - 1).getImm());
330   }
331   }
332 }
333 
334 static X86::SecondMacroFusionInstKind
335 classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) {
336   X86::CondCode CC = getCondFromBranch(MI, MCII);
337   return classifySecondCondCodeInMacroFusion(CC);
338 }
339 
340 /// Check if the instruction uses RIP relative addressing.
341 static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) {
342   unsigned Opcode = MI.getOpcode();
343   const MCInstrDesc &Desc = MCII.get(Opcode);
344   uint64_t TSFlags = Desc.TSFlags;
345   unsigned CurOp = X86II::getOperandBias(Desc);
346   int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
347   if (MemoryOperand < 0)
348     return false;
349   unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg;
350   unsigned BaseReg = MI.getOperand(BaseRegNum).getReg();
351   return (BaseReg == X86::RIP);
352 }
353 
354 /// Check if the instruction is a prefix.
355 static bool isPrefix(const MCInst &MI, const MCInstrInfo &MCII) {
356   return X86II::isPrefix(MCII.get(MI.getOpcode()).TSFlags);
357 }
358 
359 /// Check if the instruction is valid as the first instruction in macro fusion.
360 static bool isFirstMacroFusibleInst(const MCInst &Inst,
361                                     const MCInstrInfo &MCII) {
362   // An Intel instruction with RIP relative addressing is not macro fusible.
363   if (isRIPRelative(Inst, MCII))
364     return false;
365   X86::FirstMacroFusionInstKind FIK =
366       X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode());
367   return FIK != X86::FirstMacroFusionInstKind::Invalid;
368 }
369 
370 /// X86 can reduce the bytes of NOP by padding instructions with prefixes to
371 /// get a better peformance in some cases. Here, we determine which prefix is
372 /// the most suitable.
373 ///
374 /// If the instruction has a segment override prefix, use the existing one.
375 /// If the target is 64-bit, use the CS.
376 /// If the target is 32-bit,
377 ///   - If the instruction has a ESP/EBP base register, use SS.
378 ///   - Otherwise use DS.
379 uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const {
380   assert((STI.hasFeature(X86::Mode32Bit) || STI.hasFeature(X86::Mode64Bit)) &&
381          "Prefixes can be added only in 32-bit or 64-bit mode.");
382   const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
383   uint64_t TSFlags = Desc.TSFlags;
384 
385   // Determine where the memory operand starts, if present.
386   int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
387   if (MemoryOperand != -1)
388     MemoryOperand += X86II::getOperandBias(Desc);
389 
390   unsigned SegmentReg = 0;
391   if (MemoryOperand >= 0) {
392     // Check for explicit segment override on memory operand.
393     SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg();
394   }
395 
396   switch (TSFlags & X86II::FormMask) {
397   default:
398     break;
399   case X86II::RawFrmDstSrc: {
400     // Check segment override opcode prefix as needed (not for %ds).
401     if (Inst.getOperand(2).getReg() != X86::DS)
402       SegmentReg = Inst.getOperand(2).getReg();
403     break;
404   }
405   case X86II::RawFrmSrc: {
406     // Check segment override opcode prefix as needed (not for %ds).
407     if (Inst.getOperand(1).getReg() != X86::DS)
408       SegmentReg = Inst.getOperand(1).getReg();
409     break;
410   }
411   case X86II::RawFrmMemOffs: {
412     // Check segment override opcode prefix as needed.
413     SegmentReg = Inst.getOperand(1).getReg();
414     break;
415   }
416   }
417 
418   if (SegmentReg != 0)
419     return X86::getSegmentOverridePrefixForReg(SegmentReg);
420 
421   if (STI.hasFeature(X86::Mode64Bit))
422     return X86::CS_Encoding;
423 
424   if (MemoryOperand >= 0) {
425     unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg;
426     unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg();
427     if (BaseReg == X86::ESP || BaseReg == X86::EBP)
428       return X86::SS_Encoding;
429   }
430   return X86::DS_Encoding;
431 }
432 
433 /// Check if the two instructions will be macro-fused on the target cpu.
434 bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const {
435   const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode());
436   if (!InstDesc.isConditionalBranch())
437     return false;
438   if (!isFirstMacroFusibleInst(Cmp, *MCII))
439     return false;
440   const X86::FirstMacroFusionInstKind CmpKind =
441       X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode());
442   const X86::SecondMacroFusionInstKind BranchKind =
443       classifySecondInstInMacroFusion(Jcc, *MCII);
444   return X86::isMacroFused(CmpKind, BranchKind);
445 }
446 
447 /// Check if the instruction has a variant symbol operand.
448 static bool hasVariantSymbol(const MCInst &MI) {
449   for (auto &Operand : MI) {
450     if (!Operand.isExpr())
451       continue;
452     const MCExpr &Expr = *Operand.getExpr();
453     if (Expr.getKind() == MCExpr::SymbolRef &&
454         cast<MCSymbolRefExpr>(Expr).getKind() != MCSymbolRefExpr::VK_None)
455       return true;
456   }
457   return false;
458 }
459 
460 bool X86AsmBackend::allowAutoPadding() const {
461   return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone);
462 }
463 
464 bool X86AsmBackend::allowEnhancedRelaxation() const {
465   return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign;
466 }
467 
468 /// X86 has certain instructions which enable interrupts exactly one
469 /// instruction *after* the instruction which stores to SS.  Return true if the
470 /// given instruction has such an interrupt delay slot.
471 static bool hasInterruptDelaySlot(const MCInst &Inst) {
472   switch (Inst.getOpcode()) {
473   case X86::POPSS16:
474   case X86::POPSS32:
475   case X86::STI:
476     return true;
477 
478   case X86::MOV16sr:
479   case X86::MOV32sr:
480   case X86::MOV64sr:
481   case X86::MOV16sm:
482     if (Inst.getOperand(0).getReg() == X86::SS)
483       return true;
484     break;
485   }
486   return false;
487 }
488 
489 /// Check if the instruction to be emitted is right after any data.
490 static bool
491 isRightAfterData(MCFragment *CurrentFragment,
492                  const std::pair<MCFragment *, size_t> &PrevInstPosition) {
493   MCFragment *F = CurrentFragment;
494   // Empty data fragments may be created to prevent further data being
495   // added into the previous fragment, we need to skip them since they
496   // have no contents.
497   for (; isa_and_nonnull<MCDataFragment>(F); F = F->getPrevNode())
498     if (cast<MCDataFragment>(F)->getContents().size() != 0)
499       break;
500 
501   // Since data is always emitted into a DataFragment, our check strategy is
502   // simple here.
503   //   - If the fragment is a DataFragment
504   //     - If it's not the fragment where the previous instruction is,
505   //       returns true.
506   //     - If it's the fragment holding the previous instruction but its
507   //       size changed since the the previous instruction was emitted into
508   //       it, returns true.
509   //     - Otherwise returns false.
510   //   - If the fragment is not a DataFragment, returns false.
511   if (auto *DF = dyn_cast_or_null<MCDataFragment>(F))
512     return DF != PrevInstPosition.first ||
513            DF->getContents().size() != PrevInstPosition.second;
514 
515   return false;
516 }
517 
518 /// \returns the fragment size if it has instructions, otherwise returns 0.
519 static size_t getSizeForInstFragment(const MCFragment *F) {
520   if (!F || !F->hasInstructions())
521     return 0;
522   // MCEncodedFragmentWithContents being templated makes this tricky.
523   switch (F->getKind()) {
524   default:
525     llvm_unreachable("Unknown fragment with instructions!");
526   case MCFragment::FT_Data:
527     return cast<MCDataFragment>(*F).getContents().size();
528   case MCFragment::FT_Relaxable:
529     return cast<MCRelaxableFragment>(*F).getContents().size();
530   case MCFragment::FT_CompactEncodedInst:
531     return cast<MCCompactEncodedInstFragment>(*F).getContents().size();
532   }
533 }
534 
535 /// Return true if we can insert NOP or prefixes automatically before the
536 /// the instruction to be emitted.
537 bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const {
538   if (hasVariantSymbol(Inst))
539     // Linker may rewrite the instruction with variant symbol operand(e.g.
540     // TLSCALL).
541     return false;
542 
543   if (hasInterruptDelaySlot(PrevInst))
544     // If this instruction follows an interrupt enabling instruction with a one
545     // instruction delay, inserting a nop would change behavior.
546     return false;
547 
548   if (isPrefix(PrevInst, *MCII))
549     // If this instruction follows a prefix, inserting a nop/prefix would change
550     // semantic.
551     return false;
552 
553   if (isPrefix(Inst, *MCII))
554     // If this instruction is a prefix, inserting a prefix would change
555     // semantic.
556     return false;
557 
558   if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition))
559     // If this instruction follows any data, there is no clear
560     // instruction boundary, inserting a nop/prefix would change semantic.
561     return false;
562 
563   return true;
564 }
565 
566 bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const {
567   if (!OS.getAllowAutoPadding())
568     return false;
569   assert(allowAutoPadding() && "incorrect initialization!");
570 
571   // We only pad in text section.
572   if (!OS.getCurrentSectionOnly()->getKind().isText())
573     return false;
574 
575   // To be Done: Currently don't deal with Bundle cases.
576   if (OS.getAssembler().isBundlingEnabled())
577     return false;
578 
579   // Branches only need to be aligned in 32-bit or 64-bit mode.
580   if (!(STI.hasFeature(X86::Mode64Bit) || STI.hasFeature(X86::Mode32Bit)))
581     return false;
582 
583   return true;
584 }
585 
586 /// Check if the instruction operand needs to be aligned.
587 bool X86AsmBackend::needAlign(const MCInst &Inst) const {
588   const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
589   return (Desc.isConditionalBranch() &&
590           (AlignBranchType & X86::AlignBranchJcc)) ||
591          (Desc.isUnconditionalBranch() &&
592           (AlignBranchType & X86::AlignBranchJmp)) ||
593          (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) ||
594          (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) ||
595          (Desc.isIndirectBranch() &&
596           (AlignBranchType & X86::AlignBranchIndirect));
597 }
598 
599 /// Insert BoundaryAlignFragment before instructions to align branches.
600 void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
601                                          const MCInst &Inst) {
602   CanPadInst = canPadInst(Inst, OS);
603 
604   if (!canPadBranches(OS))
605     return;
606 
607   if (!isMacroFused(PrevInst, Inst))
608     // Macro fusion doesn't happen indeed, clear the pending.
609     PendingBA = nullptr;
610 
611   if (!CanPadInst)
612     return;
613 
614   if (PendingBA && OS.getCurrentFragment()->getPrevNode() == PendingBA) {
615     // Macro fusion actually happens and there is no other fragment inserted
616     // after the previous instruction.
617     //
618     // Do nothing here since we already inserted a BoudaryAlign fragment when
619     // we met the first instruction in the fused pair and we'll tie them
620     // together in emitInstructionEnd.
621     //
622     // Note: When there is at least one fragment, such as MCAlignFragment,
623     // inserted after the previous instruction, e.g.
624     //
625     // \code
626     //   cmp %rax %rcx
627     //   .align 16
628     //   je .Label0
629     // \ endcode
630     //
631     // We will treat the JCC as a unfused branch although it may be fused
632     // with the CMP.
633     return;
634   }
635 
636   if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) &&
637                           isFirstMacroFusibleInst(Inst, *MCII))) {
638     // If we meet a unfused branch or the first instuction in a fusiable pair,
639     // insert a BoundaryAlign fragment.
640     OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary));
641   }
642 }
643 
644 /// Set the last fragment to be aligned for the BoundaryAlignFragment.
645 void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) {
646   PrevInst = Inst;
647   MCFragment *CF = OS.getCurrentFragment();
648   PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF));
649   if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF))
650     F->setAllowAutoPadding(CanPadInst);
651 
652   if (!canPadBranches(OS))
653     return;
654 
655   if (!needAlign(Inst) || !PendingBA)
656     return;
657 
658   // Tie the aligned instructions into a a pending BoundaryAlign.
659   PendingBA->setLastFragment(CF);
660   PendingBA = nullptr;
661 
662   // We need to ensure that further data isn't added to the current
663   // DataFragment, so that we can get the size of instructions later in
664   // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty
665   // DataFragment.
666   if (isa_and_nonnull<MCDataFragment>(CF))
667     OS.insert(new MCDataFragment());
668 
669   // Update the maximum alignment on the current section if necessary.
670   MCSection *Sec = OS.getCurrentSectionOnly();
671   if (AlignBoundary.value() > Sec->getAlignment())
672     Sec->setAlignment(AlignBoundary);
673 }
674 
675 Optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const {
676   if (STI.getTargetTriple().isOSBinFormatELF()) {
677     unsigned Type;
678     if (STI.getTargetTriple().getArch() == Triple::x86_64) {
679       Type = llvm::StringSwitch<unsigned>(Name)
680 #define ELF_RELOC(X, Y) .Case(#X, Y)
681 #include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
682 #undef ELF_RELOC
683                  .Case("BFD_RELOC_NONE", ELF::R_X86_64_NONE)
684                  .Case("BFD_RELOC_8", ELF::R_X86_64_8)
685                  .Case("BFD_RELOC_16", ELF::R_X86_64_16)
686                  .Case("BFD_RELOC_32", ELF::R_X86_64_32)
687                  .Case("BFD_RELOC_64", ELF::R_X86_64_64)
688                  .Default(-1u);
689     } else {
690       Type = llvm::StringSwitch<unsigned>(Name)
691 #define ELF_RELOC(X, Y) .Case(#X, Y)
692 #include "llvm/BinaryFormat/ELFRelocs/i386.def"
693 #undef ELF_RELOC
694                  .Case("BFD_RELOC_NONE", ELF::R_386_NONE)
695                  .Case("BFD_RELOC_8", ELF::R_386_8)
696                  .Case("BFD_RELOC_16", ELF::R_386_16)
697                  .Case("BFD_RELOC_32", ELF::R_386_32)
698                  .Default(-1u);
699     }
700     if (Type == -1u)
701       return None;
702     return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
703   }
704   return MCAsmBackend::getFixupKind(Name);
705 }
706 
707 const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
708   const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
709       {"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
710       {"reloc_riprel_4byte_movq_load", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
711       {"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
712       {"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
713       {"reloc_signed_4byte", 0, 32, 0},
714       {"reloc_signed_4byte_relax", 0, 32, 0},
715       {"reloc_global_offset_table", 0, 32, 0},
716       {"reloc_global_offset_table8", 0, 64, 0},
717       {"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
718   };
719 
720   // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They
721   // do not require any extra processing.
722   if (Kind >= FirstLiteralRelocationKind)
723     return MCAsmBackend::getFixupKindInfo(FK_NONE);
724 
725   if (Kind < FirstTargetFixupKind)
726     return MCAsmBackend::getFixupKindInfo(Kind);
727 
728   assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
729          "Invalid kind!");
730   assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!");
731   return Infos[Kind - FirstTargetFixupKind];
732 }
733 
734 bool X86AsmBackend::shouldForceRelocation(const MCAssembler &,
735                                           const MCFixup &Fixup,
736                                           const MCValue &) {
737   return Fixup.getKind() >= FirstLiteralRelocationKind;
738 }
739 
740 static unsigned getFixupKindSize(unsigned Kind) {
741   switch (Kind) {
742   default:
743     llvm_unreachable("invalid fixup kind!");
744   case FK_NONE:
745     return 0;
746   case FK_PCRel_1:
747   case FK_SecRel_1:
748   case FK_Data_1:
749     return 1;
750   case FK_PCRel_2:
751   case FK_SecRel_2:
752   case FK_Data_2:
753     return 2;
754   case FK_PCRel_4:
755   case X86::reloc_riprel_4byte:
756   case X86::reloc_riprel_4byte_relax:
757   case X86::reloc_riprel_4byte_relax_rex:
758   case X86::reloc_riprel_4byte_movq_load:
759   case X86::reloc_signed_4byte:
760   case X86::reloc_signed_4byte_relax:
761   case X86::reloc_global_offset_table:
762   case X86::reloc_branch_4byte_pcrel:
763   case FK_SecRel_4:
764   case FK_Data_4:
765     return 4;
766   case FK_PCRel_8:
767   case FK_SecRel_8:
768   case FK_Data_8:
769   case X86::reloc_global_offset_table8:
770     return 8;
771   }
772 }
773 
774 void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
775                                const MCValue &Target,
776                                MutableArrayRef<char> Data,
777                                uint64_t Value, bool IsResolved,
778                                const MCSubtargetInfo *STI) const {
779   unsigned Kind = Fixup.getKind();
780   if (Kind >= FirstLiteralRelocationKind)
781     return;
782   unsigned Size = getFixupKindSize(Kind);
783 
784   assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
785 
786   int64_t SignedValue = static_cast<int64_t>(Value);
787   if ((Target.isAbsolute() || IsResolved) &&
788       getFixupKindInfo(Fixup.getKind()).Flags &
789       MCFixupKindInfo::FKF_IsPCRel) {
790     // check that PC relative fixup fits into the fixup size.
791     if (Size > 0 && !isIntN(Size * 8, SignedValue))
792       Asm.getContext().reportError(
793                                    Fixup.getLoc(), "value of " + Twine(SignedValue) +
794                                    " is too large for field of " + Twine(Size) +
795                                    ((Size == 1) ? " byte." : " bytes."));
796   } else {
797     // Check that uppper bits are either all zeros or all ones.
798     // Specifically ignore overflow/underflow as long as the leakage is
799     // limited to the lower bits. This is to remain compatible with
800     // other assemblers.
801     assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) &&
802            "Value does not fit in the Fixup field");
803   }
804 
805   for (unsigned i = 0; i != Size; ++i)
806     Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
807 }
808 
809 bool X86AsmBackend::mayNeedRelaxation(const MCInst &Inst,
810                                       const MCSubtargetInfo &STI) const {
811   // Branches can always be relaxed in either mode.
812   if (getRelaxedOpcodeBranch(Inst, false) != Inst.getOpcode())
813     return true;
814 
815   // Check if this instruction is ever relaxable.
816   if (getRelaxedOpcodeArith(Inst) == Inst.getOpcode())
817     return false;
818 
819 
820   // Check if the relaxable operand has an expression. For the current set of
821   // relaxable instructions, the relaxable operand is always the last operand.
822   unsigned RelaxableOp = Inst.getNumOperands() - 1;
823   if (Inst.getOperand(RelaxableOp).isExpr())
824     return true;
825 
826   return false;
827 }
828 
829 bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
830                                          uint64_t Value,
831                                          const MCRelaxableFragment *DF,
832                                          const MCAsmLayout &Layout) const {
833   // Relax if the value is too big for a (signed) i8.
834   return !isInt<8>(Value);
835 }
836 
837 // FIXME: Can tblgen help at all here to verify there aren't other instructions
838 // we can relax?
839 void X86AsmBackend::relaxInstruction(MCInst &Inst,
840                                      const MCSubtargetInfo &STI) const {
841   // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
842   bool Is16BitMode = STI.getFeatureBits()[X86::Mode16Bit];
843   unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode);
844 
845   if (RelaxedOp == Inst.getOpcode()) {
846     SmallString<256> Tmp;
847     raw_svector_ostream OS(Tmp);
848     Inst.dump_pretty(OS);
849     OS << "\n";
850     report_fatal_error("unexpected instruction to relax: " + OS.str());
851   }
852 
853   Inst.setOpcode(RelaxedOp);
854 }
855 
856 /// Return true if this instruction has been fully relaxed into it's most
857 /// general available form.
858 static bool isFullyRelaxed(const MCRelaxableFragment &RF) {
859   auto &Inst = RF.getInst();
860   auto &STI = *RF.getSubtargetInfo();
861   bool Is16BitMode = STI.getFeatureBits()[X86::Mode16Bit];
862   return getRelaxedOpcode(Inst, Is16BitMode) == Inst.getOpcode();
863 }
864 
865 bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF,
866                                             MCCodeEmitter &Emitter,
867                                             unsigned &RemainingSize) const {
868   if (!RF.getAllowAutoPadding())
869     return false;
870   // If the instruction isn't fully relaxed, shifting it around might require a
871   // larger value for one of the fixups then can be encoded.  The outer loop
872   // will also catch this before moving to the next instruction, but we need to
873   // prevent padding this single instruction as well.
874   if (!isFullyRelaxed(RF))
875     return false;
876 
877   const unsigned OldSize = RF.getContents().size();
878   if (OldSize == 15)
879     return false;
880 
881   const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize);
882   const unsigned RemainingPrefixSize = [&]() -> unsigned {
883     SmallString<15> Code;
884     raw_svector_ostream VecOS(Code);
885     Emitter.emitPrefix(RF.getInst(), VecOS, STI);
886     assert(Code.size() < 15 && "The number of prefixes must be less than 15.");
887 
888     // TODO: It turns out we need a decent amount of plumbing for the target
889     // specific bits to determine number of prefixes its safe to add.  Various
890     // targets (older chips mostly, but also Atom family) encounter decoder
891     // stalls with too many prefixes.  For testing purposes, we set the value
892     // externally for the moment.
893     unsigned ExistingPrefixSize = Code.size();
894     if (TargetPrefixMax <= ExistingPrefixSize)
895       return 0;
896     return TargetPrefixMax - ExistingPrefixSize;
897   }();
898   const unsigned PrefixBytesToAdd =
899       std::min(MaxPossiblePad, RemainingPrefixSize);
900   if (PrefixBytesToAdd == 0)
901     return false;
902 
903   const uint8_t Prefix = determinePaddingPrefix(RF.getInst());
904 
905   SmallString<256> Code;
906   Code.append(PrefixBytesToAdd, Prefix);
907   Code.append(RF.getContents().begin(), RF.getContents().end());
908   RF.getContents() = Code;
909 
910   // Adjust the fixups for the change in offsets
911   for (auto &F : RF.getFixups()) {
912     F.setOffset(F.getOffset() + PrefixBytesToAdd);
913   }
914 
915   RemainingSize -= PrefixBytesToAdd;
916   return true;
917 }
918 
919 bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF,
920                                                 MCCodeEmitter &Emitter,
921                                                 unsigned &RemainingSize) const {
922   if (isFullyRelaxed(RF))
923     // TODO: There are lots of other tricks we could apply for increasing
924     // encoding size without impacting performance.
925     return false;
926 
927   MCInst Relaxed = RF.getInst();
928   relaxInstruction(Relaxed, *RF.getSubtargetInfo());
929 
930   SmallVector<MCFixup, 4> Fixups;
931   SmallString<15> Code;
932   raw_svector_ostream VecOS(Code);
933   Emitter.encodeInstruction(Relaxed, VecOS, Fixups, *RF.getSubtargetInfo());
934   const unsigned OldSize = RF.getContents().size();
935   const unsigned NewSize = Code.size();
936   assert(NewSize >= OldSize && "size decrease during relaxation?");
937   unsigned Delta = NewSize - OldSize;
938   if (Delta > RemainingSize)
939     return false;
940   RF.setInst(Relaxed);
941   RF.getContents() = Code;
942   RF.getFixups() = Fixups;
943   RemainingSize -= Delta;
944   return true;
945 }
946 
947 bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF,
948                                            MCCodeEmitter &Emitter,
949                                            unsigned &RemainingSize) const {
950   bool Changed = false;
951   if (RemainingSize != 0)
952     Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize);
953   if (RemainingSize != 0)
954     Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize);
955   return Changed;
956 }
957 
958 void X86AsmBackend::finishLayout(MCAssembler const &Asm,
959                                  MCAsmLayout &Layout) const {
960   // See if we can further relax some instructions to cut down on the number of
961   // nop bytes required for code alignment.  The actual win is in reducing
962   // instruction count, not number of bytes.  Modern X86-64 can easily end up
963   // decode limited.  It is often better to reduce the number of instructions
964   // (i.e. eliminate nops) even at the cost of increasing the size and
965   // complexity of others.
966   if (!X86PadForAlign && !X86PadForBranchAlign)
967     return;
968 
969   // The processed regions are delimitered by LabeledFragments. -g may have more
970   // MCSymbols and therefore different relaxation results. X86PadForAlign is
971   // disabled by default to eliminate the -g vs non -g difference.
972   DenseSet<MCFragment *> LabeledFragments;
973   for (const MCSymbol &S : Asm.symbols())
974     LabeledFragments.insert(S.getFragment(false));
975 
976   for (MCSection &Sec : Asm) {
977     if (!Sec.getKind().isText())
978       continue;
979 
980     SmallVector<MCRelaxableFragment *, 4> Relaxable;
981     for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) {
982       MCFragment &F = *I;
983 
984       if (LabeledFragments.count(&F))
985         Relaxable.clear();
986 
987       if (F.getKind() == MCFragment::FT_Data ||
988           F.getKind() == MCFragment::FT_CompactEncodedInst)
989         // Skip and ignore
990         continue;
991 
992       if (F.getKind() == MCFragment::FT_Relaxable) {
993         auto &RF = cast<MCRelaxableFragment>(*I);
994         Relaxable.push_back(&RF);
995         continue;
996       }
997 
998       auto canHandle = [](MCFragment &F) -> bool {
999         switch (F.getKind()) {
1000         default:
1001           return false;
1002         case MCFragment::FT_Align:
1003           return X86PadForAlign;
1004         case MCFragment::FT_BoundaryAlign:
1005           return X86PadForBranchAlign;
1006         }
1007       };
1008       // For any unhandled kind, assume we can't change layout.
1009       if (!canHandle(F)) {
1010         Relaxable.clear();
1011         continue;
1012       }
1013 
1014 #ifndef NDEBUG
1015       const uint64_t OrigOffset = Layout.getFragmentOffset(&F);
1016 #endif
1017       const uint64_t OrigSize = Asm.computeFragmentSize(Layout, F);
1018 
1019       // To keep the effects local, prefer to relax instructions closest to
1020       // the align directive.  This is purely about human understandability
1021       // of the resulting code.  If we later find a reason to expand
1022       // particular instructions over others, we can adjust.
1023       MCFragment *FirstChangedFragment = nullptr;
1024       unsigned RemainingSize = OrigSize;
1025       while (!Relaxable.empty() && RemainingSize != 0) {
1026         auto &RF = *Relaxable.pop_back_val();
1027         // Give the backend a chance to play any tricks it wishes to increase
1028         // the encoding size of the given instruction.  Target independent code
1029         // will try further relaxation, but target's may play further tricks.
1030         if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize))
1031           FirstChangedFragment = &RF;
1032 
1033         // If we have an instruction which hasn't been fully relaxed, we can't
1034         // skip past it and insert bytes before it.  Changing its starting
1035         // offset might require a larger negative offset than it can encode.
1036         // We don't need to worry about larger positive offsets as none of the
1037         // possible offsets between this and our align are visible, and the
1038         // ones afterwards aren't changing.
1039         if (!isFullyRelaxed(RF))
1040           break;
1041       }
1042       Relaxable.clear();
1043 
1044       if (FirstChangedFragment) {
1045         // Make sure the offsets for any fragments in the effected range get
1046         // updated.  Note that this (conservatively) invalidates the offsets of
1047         // those following, but this is not required.
1048         Layout.invalidateFragmentsFrom(FirstChangedFragment);
1049       }
1050 
1051       // BoundaryAlign explicitly tracks it's size (unlike align)
1052       if (F.getKind() == MCFragment::FT_BoundaryAlign)
1053         cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize);
1054 
1055 #ifndef NDEBUG
1056       const uint64_t FinalOffset = Layout.getFragmentOffset(&F);
1057       const uint64_t FinalSize = Asm.computeFragmentSize(Layout, F);
1058       assert(OrigOffset + OrigSize == FinalOffset + FinalSize &&
1059              "can't move start of next fragment!");
1060       assert(FinalSize == RemainingSize && "inconsistent size computation?");
1061 #endif
1062 
1063       // If we're looking at a boundary align, make sure we don't try to pad
1064       // its target instructions for some following directive.  Doing so would
1065       // break the alignment of the current boundary align.
1066       if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) {
1067         const MCFragment *LastFragment = BF->getLastFragment();
1068         if (!LastFragment)
1069           continue;
1070         while (&*I != LastFragment)
1071           ++I;
1072       }
1073     }
1074   }
1075 
1076   // The layout is done. Mark every fragment as valid.
1077   for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
1078     MCSection &Section = *Layout.getSectionOrder()[i];
1079     Layout.getFragmentOffset(&*Section.getFragmentList().rbegin());
1080     Asm.computeFragmentSize(Layout, *Section.getFragmentList().rbegin());
1081   }
1082 }
1083 
1084 unsigned X86AsmBackend::getMaximumNopSize() const {
1085   if (STI.hasFeature(X86::Mode16Bit))
1086     return 4;
1087   if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Mode64Bit))
1088     return 1;
1089   if (STI.getFeatureBits()[X86::FeatureFast7ByteNOP])
1090     return 7;
1091   if (STI.getFeatureBits()[X86::FeatureFast15ByteNOP])
1092     return 15;
1093   if (STI.getFeatureBits()[X86::FeatureFast11ByteNOP])
1094     return 11;
1095   // FIXME: handle 32-bit mode
1096   // 15-bytes is the longest single NOP instruction, but 10-bytes is
1097   // commonly the longest that can be efficiently decoded.
1098   return 10;
1099 }
1100 
1101 /// Write a sequence of optimal nops to the output, covering \p Count
1102 /// bytes.
1103 /// \return - true on success, false on failure
1104 bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
1105   static const char Nops32Bit[10][11] = {
1106       // nop
1107       "\x90",
1108       // xchg %ax,%ax
1109       "\x66\x90",
1110       // nopl (%[re]ax)
1111       "\x0f\x1f\x00",
1112       // nopl 0(%[re]ax)
1113       "\x0f\x1f\x40\x00",
1114       // nopl 0(%[re]ax,%[re]ax,1)
1115       "\x0f\x1f\x44\x00\x00",
1116       // nopw 0(%[re]ax,%[re]ax,1)
1117       "\x66\x0f\x1f\x44\x00\x00",
1118       // nopl 0L(%[re]ax)
1119       "\x0f\x1f\x80\x00\x00\x00\x00",
1120       // nopl 0L(%[re]ax,%[re]ax,1)
1121       "\x0f\x1f\x84\x00\x00\x00\x00\x00",
1122       // nopw 0L(%[re]ax,%[re]ax,1)
1123       "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00",
1124       // nopw %cs:0L(%[re]ax,%[re]ax,1)
1125       "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00",
1126   };
1127 
1128   // 16-bit mode uses different nop patterns than 32-bit.
1129   static const char Nops16Bit[4][11] = {
1130       // nop
1131       "\x90",
1132       // xchg %eax,%eax
1133       "\x66\x90",
1134       // lea 0(%si),%si
1135       "\x8d\x74\x00",
1136       // lea 0w(%si),%si
1137       "\x8d\xb4\x00\x00",
1138   };
1139 
1140   const char(*Nops)[11] =
1141       STI.getFeatureBits()[X86::Mode16Bit] ? Nops16Bit : Nops32Bit;
1142 
1143   uint64_t MaxNopLength = (uint64_t)getMaximumNopSize();
1144 
1145   // Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining
1146   // length.
1147   do {
1148     const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength);
1149     const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;
1150     for (uint8_t i = 0; i < Prefixes; i++)
1151       OS << '\x66';
1152     const uint8_t Rest = ThisNopLength - Prefixes;
1153     if (Rest != 0)
1154       OS.write(Nops[Rest - 1], Rest);
1155     Count -= ThisNopLength;
1156   } while (Count != 0);
1157 
1158   return true;
1159 }
1160 
1161 /* *** */
1162 
1163 namespace {
1164 
1165 class ELFX86AsmBackend : public X86AsmBackend {
1166 public:
1167   uint8_t OSABI;
1168   ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI)
1169       : X86AsmBackend(T, STI), OSABI(OSABI) {}
1170 };
1171 
1172 class ELFX86_32AsmBackend : public ELFX86AsmBackend {
1173 public:
1174   ELFX86_32AsmBackend(const Target &T, uint8_t OSABI,
1175                       const MCSubtargetInfo &STI)
1176     : ELFX86AsmBackend(T, OSABI, STI) {}
1177 
1178   std::unique_ptr<MCObjectTargetWriter>
1179   createObjectTargetWriter() const override {
1180     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, ELF::EM_386);
1181   }
1182 };
1183 
1184 class ELFX86_X32AsmBackend : public ELFX86AsmBackend {
1185 public:
1186   ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI,
1187                        const MCSubtargetInfo &STI)
1188       : ELFX86AsmBackend(T, OSABI, STI) {}
1189 
1190   std::unique_ptr<MCObjectTargetWriter>
1191   createObjectTargetWriter() const override {
1192     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1193                                     ELF::EM_X86_64);
1194   }
1195 };
1196 
1197 class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend {
1198 public:
1199   ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI,
1200                          const MCSubtargetInfo &STI)
1201       : ELFX86AsmBackend(T, OSABI, STI) {}
1202 
1203   std::unique_ptr<MCObjectTargetWriter>
1204   createObjectTargetWriter() const override {
1205     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1206                                     ELF::EM_IAMCU);
1207   }
1208 };
1209 
1210 class ELFX86_64AsmBackend : public ELFX86AsmBackend {
1211 public:
1212   ELFX86_64AsmBackend(const Target &T, uint8_t OSABI,
1213                       const MCSubtargetInfo &STI)
1214     : ELFX86AsmBackend(T, OSABI, STI) {}
1215 
1216   std::unique_ptr<MCObjectTargetWriter>
1217   createObjectTargetWriter() const override {
1218     return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI, ELF::EM_X86_64);
1219   }
1220 };
1221 
1222 class WindowsX86AsmBackend : public X86AsmBackend {
1223   bool Is64Bit;
1224 
1225 public:
1226   WindowsX86AsmBackend(const Target &T, bool is64Bit,
1227                        const MCSubtargetInfo &STI)
1228     : X86AsmBackend(T, STI)
1229     , Is64Bit(is64Bit) {
1230   }
1231 
1232   Optional<MCFixupKind> getFixupKind(StringRef Name) const override {
1233     return StringSwitch<Optional<MCFixupKind>>(Name)
1234         .Case("dir32", FK_Data_4)
1235         .Case("secrel32", FK_SecRel_4)
1236         .Case("secidx", FK_SecRel_2)
1237         .Default(MCAsmBackend::getFixupKind(Name));
1238   }
1239 
1240   std::unique_ptr<MCObjectTargetWriter>
1241   createObjectTargetWriter() const override {
1242     return createX86WinCOFFObjectWriter(Is64Bit);
1243   }
1244 };
1245 
1246 namespace CU {
1247 
1248   /// Compact unwind encoding values.
1249   enum CompactUnwindEncodings {
1250     /// [RE]BP based frame where [RE]BP is pused on the stack immediately after
1251     /// the return address, then [RE]SP is moved to [RE]BP.
1252     UNWIND_MODE_BP_FRAME                   = 0x01000000,
1253 
1254     /// A frameless function with a small constant stack size.
1255     UNWIND_MODE_STACK_IMMD                 = 0x02000000,
1256 
1257     /// A frameless function with a large constant stack size.
1258     UNWIND_MODE_STACK_IND                  = 0x03000000,
1259 
1260     /// No compact unwind encoding is available.
1261     UNWIND_MODE_DWARF                      = 0x04000000,
1262 
1263     /// Mask for encoding the frame registers.
1264     UNWIND_BP_FRAME_REGISTERS              = 0x00007FFF,
1265 
1266     /// Mask for encoding the frameless registers.
1267     UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
1268   };
1269 
1270 } // namespace CU
1271 
1272 class DarwinX86AsmBackend : public X86AsmBackend {
1273   const MCRegisterInfo &MRI;
1274 
1275   /// Number of registers that can be saved in a compact unwind encoding.
1276   enum { CU_NUM_SAVED_REGS = 6 };
1277 
1278   mutable unsigned SavedRegs[CU_NUM_SAVED_REGS];
1279   Triple TT;
1280   bool Is64Bit;
1281 
1282   unsigned OffsetSize;                   ///< Offset of a "push" instruction.
1283   unsigned MoveInstrSize;                ///< Size of a "move" instruction.
1284   unsigned StackDivide;                  ///< Amount to adjust stack size by.
1285 protected:
1286   /// Size of a "push" instruction for the given register.
1287   unsigned PushInstrSize(unsigned Reg) const {
1288     switch (Reg) {
1289       case X86::EBX:
1290       case X86::ECX:
1291       case X86::EDX:
1292       case X86::EDI:
1293       case X86::ESI:
1294       case X86::EBP:
1295       case X86::RBX:
1296       case X86::RBP:
1297         return 1;
1298       case X86::R12:
1299       case X86::R13:
1300       case X86::R14:
1301       case X86::R15:
1302         return 2;
1303     }
1304     return 1;
1305   }
1306 
1307 private:
1308   /// Get the compact unwind number for a given register. The number
1309   /// corresponds to the enum lists in compact_unwind_encoding.h.
1310   int getCompactUnwindRegNum(unsigned Reg) const {
1311     static const MCPhysReg CU32BitRegs[7] = {
1312       X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
1313     };
1314     static const MCPhysReg CU64BitRegs[] = {
1315       X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
1316     };
1317     const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
1318     for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
1319       if (*CURegs == Reg)
1320         return Idx;
1321 
1322     return -1;
1323   }
1324 
1325   /// Return the registers encoded for a compact encoding with a frame
1326   /// pointer.
1327   uint32_t encodeCompactUnwindRegistersWithFrame() const {
1328     // Encode the registers in the order they were saved --- 3-bits per
1329     // register. The list of saved registers is assumed to be in reverse
1330     // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.
1331     uint32_t RegEnc = 0;
1332     for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) {
1333       unsigned Reg = SavedRegs[i];
1334       if (Reg == 0) break;
1335 
1336       int CURegNum = getCompactUnwindRegNum(Reg);
1337       if (CURegNum == -1) return ~0U;
1338 
1339       // Encode the 3-bit register number in order, skipping over 3-bits for
1340       // each register.
1341       RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
1342     }
1343 
1344     assert((RegEnc & 0x3FFFF) == RegEnc &&
1345            "Invalid compact register encoding!");
1346     return RegEnc;
1347   }
1348 
1349   /// Create the permutation encoding used with frameless stacks. It is
1350   /// passed the number of registers to be saved and an array of the registers
1351   /// saved.
1352   uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const {
1353     // The saved registers are numbered from 1 to 6. In order to encode the
1354     // order in which they were saved, we re-number them according to their
1355     // place in the register order. The re-numbering is relative to the last
1356     // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in
1357     // that order:
1358     //
1359     //    Orig  Re-Num
1360     //    ----  ------
1361     //     6       6
1362     //     2       2
1363     //     4       3
1364     //     5       3
1365     //
1366     for (unsigned i = 0; i < RegCount; ++i) {
1367       int CUReg = getCompactUnwindRegNum(SavedRegs[i]);
1368       if (CUReg == -1) return ~0U;
1369       SavedRegs[i] = CUReg;
1370     }
1371 
1372     // Reverse the list.
1373     std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]);
1374 
1375     uint32_t RenumRegs[CU_NUM_SAVED_REGS];
1376     for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){
1377       unsigned Countless = 0;
1378       for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
1379         if (SavedRegs[j] < SavedRegs[i])
1380           ++Countless;
1381 
1382       RenumRegs[i] = SavedRegs[i] - Countless - 1;
1383     }
1384 
1385     // Take the renumbered values and encode them into a 10-bit number.
1386     uint32_t permutationEncoding = 0;
1387     switch (RegCount) {
1388     case 6:
1389       permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
1390                              + 6 * RenumRegs[2] +  2 * RenumRegs[3]
1391                              +     RenumRegs[4];
1392       break;
1393     case 5:
1394       permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
1395                              + 6 * RenumRegs[3] +  2 * RenumRegs[4]
1396                              +     RenumRegs[5];
1397       break;
1398     case 4:
1399       permutationEncoding |=  60 * RenumRegs[2] + 12 * RenumRegs[3]
1400                              + 3 * RenumRegs[4] +      RenumRegs[5];
1401       break;
1402     case 3:
1403       permutationEncoding |=  20 * RenumRegs[3] +  4 * RenumRegs[4]
1404                              +     RenumRegs[5];
1405       break;
1406     case 2:
1407       permutationEncoding |=   5 * RenumRegs[4] +      RenumRegs[5];
1408       break;
1409     case 1:
1410       permutationEncoding |=       RenumRegs[5];
1411       break;
1412     }
1413 
1414     assert((permutationEncoding & 0x3FF) == permutationEncoding &&
1415            "Invalid compact register encoding!");
1416     return permutationEncoding;
1417   }
1418 
1419 public:
1420   DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI,
1421                       const MCSubtargetInfo &STI)
1422       : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()),
1423         Is64Bit(TT.isArch64Bit()) {
1424     memset(SavedRegs, 0, sizeof(SavedRegs));
1425     OffsetSize = Is64Bit ? 8 : 4;
1426     MoveInstrSize = Is64Bit ? 3 : 2;
1427     StackDivide = Is64Bit ? 8 : 4;
1428   }
1429 
1430   std::unique_ptr<MCObjectTargetWriter>
1431   createObjectTargetWriter() const override {
1432     uint32_t CPUType = cantFail(MachO::getCPUType(TT));
1433     uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT));
1434     return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType);
1435   }
1436 
1437   /// Implementation of algorithm to generate the compact unwind encoding
1438   /// for the CFI instructions.
1439   uint32_t
1440   generateCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs) const override {
1441     if (Instrs.empty()) return 0;
1442 
1443     // Reset the saved registers.
1444     unsigned SavedRegIdx = 0;
1445     memset(SavedRegs, 0, sizeof(SavedRegs));
1446 
1447     bool HasFP = false;
1448 
1449     // Encode that we are using EBP/RBP as the frame pointer.
1450     uint32_t CompactUnwindEncoding = 0;
1451 
1452     unsigned SubtractInstrIdx = Is64Bit ? 3 : 2;
1453     unsigned InstrOffset = 0;
1454     unsigned StackAdjust = 0;
1455     unsigned StackSize = 0;
1456     unsigned NumDefCFAOffsets = 0;
1457     int MinAbsOffset = std::numeric_limits<int>::max();
1458 
1459     for (unsigned i = 0, e = Instrs.size(); i != e; ++i) {
1460       const MCCFIInstruction &Inst = Instrs[i];
1461 
1462       switch (Inst.getOperation()) {
1463       default:
1464         // Any other CFI directives indicate a frame that we aren't prepared
1465         // to represent via compact unwind, so just bail out.
1466         return 0;
1467       case MCCFIInstruction::OpDefCfaRegister: {
1468         // Defines a frame pointer. E.g.
1469         //
1470         //     movq %rsp, %rbp
1471         //  L0:
1472         //     .cfi_def_cfa_register %rbp
1473         //
1474         HasFP = true;
1475 
1476         // If the frame pointer is other than esp/rsp, we do not have a way to
1477         // generate a compact unwinding representation, so bail out.
1478         if (*MRI.getLLVMRegNum(Inst.getRegister(), true) !=
1479             (Is64Bit ? X86::RBP : X86::EBP))
1480           return 0;
1481 
1482         // Reset the counts.
1483         memset(SavedRegs, 0, sizeof(SavedRegs));
1484         StackAdjust = 0;
1485         SavedRegIdx = 0;
1486         MinAbsOffset = std::numeric_limits<int>::max();
1487         InstrOffset += MoveInstrSize;
1488         break;
1489       }
1490       case MCCFIInstruction::OpDefCfaOffset: {
1491         // Defines a new offset for the CFA. E.g.
1492         //
1493         //  With frame:
1494         //
1495         //     pushq %rbp
1496         //  L0:
1497         //     .cfi_def_cfa_offset 16
1498         //
1499         //  Without frame:
1500         //
1501         //     subq $72, %rsp
1502         //  L0:
1503         //     .cfi_def_cfa_offset 80
1504         //
1505         StackSize = Inst.getOffset() / StackDivide;
1506         ++NumDefCFAOffsets;
1507         break;
1508       }
1509       case MCCFIInstruction::OpOffset: {
1510         // Defines a "push" of a callee-saved register. E.g.
1511         //
1512         //     pushq %r15
1513         //     pushq %r14
1514         //     pushq %rbx
1515         //  L0:
1516         //     subq $120, %rsp
1517         //  L1:
1518         //     .cfi_offset %rbx, -40
1519         //     .cfi_offset %r14, -32
1520         //     .cfi_offset %r15, -24
1521         //
1522         if (SavedRegIdx == CU_NUM_SAVED_REGS)
1523           // If there are too many saved registers, we cannot use a compact
1524           // unwind encoding.
1525           return CU::UNWIND_MODE_DWARF;
1526 
1527         unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true);
1528         SavedRegs[SavedRegIdx++] = Reg;
1529         StackAdjust += OffsetSize;
1530         MinAbsOffset = std::min(MinAbsOffset, abs(Inst.getOffset()));
1531         InstrOffset += PushInstrSize(Reg);
1532         break;
1533       }
1534       }
1535     }
1536 
1537     StackAdjust /= StackDivide;
1538 
1539     if (HasFP) {
1540       if ((StackAdjust & 0xFF) != StackAdjust)
1541         // Offset was too big for a compact unwind encoding.
1542         return CU::UNWIND_MODE_DWARF;
1543 
1544       // We don't attempt to track a real StackAdjust, so if the saved registers
1545       // aren't adjacent to rbp we can't cope.
1546       if (SavedRegIdx != 0 && MinAbsOffset != 3 * (int)OffsetSize)
1547         return CU::UNWIND_MODE_DWARF;
1548 
1549       // Get the encoding of the saved registers when we have a frame pointer.
1550       uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame();
1551       if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1552 
1553       CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME;
1554       CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
1555       CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
1556     } else {
1557       SubtractInstrIdx += InstrOffset;
1558       ++StackAdjust;
1559 
1560       if ((StackSize & 0xFF) == StackSize) {
1561         // Frameless stack with a small stack size.
1562         CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD;
1563 
1564         // Encode the stack size.
1565         CompactUnwindEncoding |= (StackSize & 0xFF) << 16;
1566       } else {
1567         if ((StackAdjust & 0x7) != StackAdjust)
1568           // The extra stack adjustments are too big for us to handle.
1569           return CU::UNWIND_MODE_DWARF;
1570 
1571         // Frameless stack with an offset too large for us to encode compactly.
1572         CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND;
1573 
1574         // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
1575         // instruction.
1576         CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
1577 
1578         // Encode any extra stack adjustments (done via push instructions).
1579         CompactUnwindEncoding |= (StackAdjust & 0x7) << 13;
1580       }
1581 
1582       // Encode the number of registers saved. (Reverse the list first.)
1583       std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]);
1584       CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10;
1585 
1586       // Get the encoding of the saved registers when we don't have a frame
1587       // pointer.
1588       uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx);
1589       if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1590 
1591       // Encode the register encoding.
1592       CompactUnwindEncoding |=
1593         RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION;
1594     }
1595 
1596     return CompactUnwindEncoding;
1597   }
1598 };
1599 
1600 } // end anonymous namespace
1601 
1602 MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
1603                                            const MCSubtargetInfo &STI,
1604                                            const MCRegisterInfo &MRI,
1605                                            const MCTargetOptions &Options) {
1606   const Triple &TheTriple = STI.getTargetTriple();
1607   if (TheTriple.isOSBinFormatMachO())
1608     return new DarwinX86AsmBackend(T, MRI, STI);
1609 
1610   if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1611     return new WindowsX86AsmBackend(T, false, STI);
1612 
1613   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1614 
1615   if (TheTriple.isOSIAMCU())
1616     return new ELFX86_IAMCUAsmBackend(T, OSABI, STI);
1617 
1618   return new ELFX86_32AsmBackend(T, OSABI, STI);
1619 }
1620 
1621 MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
1622                                            const MCSubtargetInfo &STI,
1623                                            const MCRegisterInfo &MRI,
1624                                            const MCTargetOptions &Options) {
1625   const Triple &TheTriple = STI.getTargetTriple();
1626   if (TheTriple.isOSBinFormatMachO())
1627     return new DarwinX86AsmBackend(T, MRI, STI);
1628 
1629   if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1630     return new WindowsX86AsmBackend(T, true, STI);
1631 
1632   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1633 
1634   if (TheTriple.isX32())
1635     return new ELFX86_X32AsmBackend(T, OSABI, STI);
1636   return new ELFX86_64AsmBackend(T, OSABI, STI);
1637 }
1638