1 //===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MCTargetDesc/X86BaseInfo.h"
10 #include "MCTargetDesc/X86FixupKinds.h"
11 #include "llvm/ADT/StringSwitch.h"
12 #include "llvm/BinaryFormat/ELF.h"
13 #include "llvm/BinaryFormat/MachO.h"
14 #include "llvm/MC/MCAsmBackend.h"
15 #include "llvm/MC/MCAsmLayout.h"
16 #include "llvm/MC/MCAssembler.h"
17 #include "llvm/MC/MCCodeEmitter.h"
18 #include "llvm/MC/MCContext.h"
19 #include "llvm/MC/MCDwarf.h"
20 #include "llvm/MC/MCELFObjectWriter.h"
21 #include "llvm/MC/MCExpr.h"
22 #include "llvm/MC/MCFixupKindInfo.h"
23 #include "llvm/MC/MCInst.h"
24 #include "llvm/MC/MCInstrInfo.h"
25 #include "llvm/MC/MCMachObjectWriter.h"
26 #include "llvm/MC/MCObjectStreamer.h"
27 #include "llvm/MC/MCObjectWriter.h"
28 #include "llvm/MC/MCRegisterInfo.h"
29 #include "llvm/MC/MCSectionMachO.h"
30 #include "llvm/MC/MCSubtargetInfo.h"
31 #include "llvm/MC/MCValue.h"
32 #include "llvm/MC/TargetRegistry.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/raw_ostream.h"
36 
37 using namespace llvm;
38 
39 namespace {
40 /// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind
41 class X86AlignBranchKind {
42 private:
43   uint8_t AlignBranchKind = 0;
44 
45 public:
46   void operator=(const std::string &Val) {
47     if (Val.empty())
48       return;
49     SmallVector<StringRef, 6> BranchTypes;
50     StringRef(Val).split(BranchTypes, '+', -1, false);
51     for (auto BranchType : BranchTypes) {
52       if (BranchType == "fused")
53         addKind(X86::AlignBranchFused);
54       else if (BranchType == "jcc")
55         addKind(X86::AlignBranchJcc);
56       else if (BranchType == "jmp")
57         addKind(X86::AlignBranchJmp);
58       else if (BranchType == "call")
59         addKind(X86::AlignBranchCall);
60       else if (BranchType == "ret")
61         addKind(X86::AlignBranchRet);
62       else if (BranchType == "indirect")
63         addKind(X86::AlignBranchIndirect);
64       else {
65         errs() << "invalid argument " << BranchType.str()
66                << " to -x86-align-branch=; each element must be one of: fused, "
67                   "jcc, jmp, call, ret, indirect.(plus separated)\n";
68       }
69     }
70   }
71 
72   operator uint8_t() const { return AlignBranchKind; }
73   void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; }
74 };
75 
76 X86AlignBranchKind X86AlignBranchKindLoc;
77 
78 cl::opt<unsigned> X86AlignBranchBoundary(
79     "x86-align-branch-boundary", cl::init(0),
80     cl::desc(
81         "Control how the assembler should align branches with NOP. If the "
82         "boundary's size is not 0, it should be a power of 2 and no less "
83         "than 32. Branches will be aligned to prevent from being across or "
84         "against the boundary of specified size. The default value 0 does not "
85         "align branches."));
86 
87 cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch(
88     "x86-align-branch",
89     cl::desc(
90         "Specify types of branches to align (plus separated list of types):"
91              "\njcc      indicates conditional jumps"
92              "\nfused    indicates fused conditional jumps"
93              "\njmp      indicates direct unconditional jumps"
94              "\ncall     indicates direct and indirect calls"
95              "\nret      indicates rets"
96              "\nindirect indicates indirect unconditional jumps"),
97     cl::location(X86AlignBranchKindLoc));
98 
99 cl::opt<bool> X86AlignBranchWithin32BBoundaries(
100     "x86-branches-within-32B-boundaries", cl::init(false),
101     cl::desc(
102         "Align selected instructions to mitigate negative performance impact "
103         "of Intel's micro code update for errata skx102.  May break "
104         "assumptions about labels corresponding to particular instructions, "
105         "and should be used with caution."));
106 
107 cl::opt<unsigned> X86PadMaxPrefixSize(
108     "x86-pad-max-prefix-size", cl::init(0),
109     cl::desc("Maximum number of prefixes to use for padding"));
110 
111 cl::opt<bool> X86PadForAlign(
112     "x86-pad-for-align", cl::init(false), cl::Hidden,
113     cl::desc("Pad previous instructions to implement align directives"));
114 
115 cl::opt<bool> X86PadForBranchAlign(
116     "x86-pad-for-branch-align", cl::init(true), cl::Hidden,
117     cl::desc("Pad previous instructions to implement branch alignment"));
118 
119 class X86AsmBackend : public MCAsmBackend {
120   const MCSubtargetInfo &STI;
121   std::unique_ptr<const MCInstrInfo> MCII;
122   X86AlignBranchKind AlignBranchType;
123   Align AlignBoundary;
124   unsigned TargetPrefixMax = 0;
125 
126   MCInst PrevInst;
127   MCBoundaryAlignFragment *PendingBA = nullptr;
128   std::pair<MCFragment *, size_t> PrevInstPosition;
129   bool CanPadInst;
130 
131   uint8_t determinePaddingPrefix(const MCInst &Inst) const;
132   bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;
133   bool needAlign(const MCInst &Inst) const;
134   bool canPadBranches(MCObjectStreamer &OS) const;
135   bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const;
136 
137 public:
138   X86AsmBackend(const Target &T, const MCSubtargetInfo &STI)
139       : MCAsmBackend(support::little), STI(STI),
140         MCII(T.createMCInstrInfo()) {
141     if (X86AlignBranchWithin32BBoundaries) {
142       // At the moment, this defaults to aligning fused branches, unconditional
143       // jumps, and (unfused) conditional jumps with nops.  Both the
144       // instructions aligned and the alignment method (nop vs prefix) may
145       // change in the future.
146       AlignBoundary = assumeAligned(32);;
147       AlignBranchType.addKind(X86::AlignBranchFused);
148       AlignBranchType.addKind(X86::AlignBranchJcc);
149       AlignBranchType.addKind(X86::AlignBranchJmp);
150     }
151     // Allow overriding defaults set by main flag
152     if (X86AlignBranchBoundary.getNumOccurrences())
153       AlignBoundary = assumeAligned(X86AlignBranchBoundary);
154     if (X86AlignBranch.getNumOccurrences())
155       AlignBranchType = X86AlignBranchKindLoc;
156     if (X86PadMaxPrefixSize.getNumOccurrences())
157       TargetPrefixMax = X86PadMaxPrefixSize;
158   }
159 
160   bool allowAutoPadding() const override;
161   bool allowEnhancedRelaxation() const override;
162   void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst,
163                             const MCSubtargetInfo &STI) override;
164   void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) override;
165 
166   unsigned getNumFixupKinds() const override {
167     return X86::NumTargetFixupKinds;
168   }
169 
170   Optional<MCFixupKind> getFixupKind(StringRef Name) const override;
171 
172   const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
173 
174   bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
175                              const MCValue &Target) override;
176 
177   void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
178                   const MCValue &Target, MutableArrayRef<char> Data,
179                   uint64_t Value, bool IsResolved,
180                   const MCSubtargetInfo *STI) const override;
181 
182   bool mayNeedRelaxation(const MCInst &Inst,
183                          const MCSubtargetInfo &STI) const override;
184 
185   bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
186                             const MCRelaxableFragment *DF,
187                             const MCAsmLayout &Layout) const override;
188 
189   void relaxInstruction(MCInst &Inst,
190                         const MCSubtargetInfo &STI) const override;
191 
192   bool padInstructionViaRelaxation(MCRelaxableFragment &RF,
193                                    MCCodeEmitter &Emitter,
194                                    unsigned &RemainingSize) const;
195 
196   bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
197                                unsigned &RemainingSize) const;
198 
199   bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
200                               unsigned &RemainingSize) const;
201 
202   void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override;
203 
204   unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const override;
205 
206   bool writeNopData(raw_ostream &OS, uint64_t Count,
207                     const MCSubtargetInfo *STI) const override;
208 };
209 } // end anonymous namespace
210 
211 static unsigned getRelaxedOpcodeBranch(const MCInst &Inst, bool Is16BitMode) {
212   unsigned Op = Inst.getOpcode();
213   switch (Op) {
214   default:
215     return Op;
216   case X86::JCC_1:
217     return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4;
218   case X86::JMP_1:
219     return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4;
220   }
221 }
222 
223 static unsigned getRelaxedOpcodeArith(const MCInst &Inst) {
224   unsigned Op = Inst.getOpcode();
225   switch (Op) {
226   default:
227     return Op;
228 
229     // IMUL
230   case X86::IMUL16rri8: return X86::IMUL16rri;
231   case X86::IMUL16rmi8: return X86::IMUL16rmi;
232   case X86::IMUL32rri8: return X86::IMUL32rri;
233   case X86::IMUL32rmi8: return X86::IMUL32rmi;
234   case X86::IMUL64rri8: return X86::IMUL64rri32;
235   case X86::IMUL64rmi8: return X86::IMUL64rmi32;
236 
237     // AND
238   case X86::AND16ri8: return X86::AND16ri;
239   case X86::AND16mi8: return X86::AND16mi;
240   case X86::AND32ri8: return X86::AND32ri;
241   case X86::AND32mi8: return X86::AND32mi;
242   case X86::AND64ri8: return X86::AND64ri32;
243   case X86::AND64mi8: return X86::AND64mi32;
244 
245     // OR
246   case X86::OR16ri8: return X86::OR16ri;
247   case X86::OR16mi8: return X86::OR16mi;
248   case X86::OR32ri8: return X86::OR32ri;
249   case X86::OR32mi8: return X86::OR32mi;
250   case X86::OR64ri8: return X86::OR64ri32;
251   case X86::OR64mi8: return X86::OR64mi32;
252 
253     // XOR
254   case X86::XOR16ri8: return X86::XOR16ri;
255   case X86::XOR16mi8: return X86::XOR16mi;
256   case X86::XOR32ri8: return X86::XOR32ri;
257   case X86::XOR32mi8: return X86::XOR32mi;
258   case X86::XOR64ri8: return X86::XOR64ri32;
259   case X86::XOR64mi8: return X86::XOR64mi32;
260 
261     // ADD
262   case X86::ADD16ri8: return X86::ADD16ri;
263   case X86::ADD16mi8: return X86::ADD16mi;
264   case X86::ADD32ri8: return X86::ADD32ri;
265   case X86::ADD32mi8: return X86::ADD32mi;
266   case X86::ADD64ri8: return X86::ADD64ri32;
267   case X86::ADD64mi8: return X86::ADD64mi32;
268 
269    // ADC
270   case X86::ADC16ri8: return X86::ADC16ri;
271   case X86::ADC16mi8: return X86::ADC16mi;
272   case X86::ADC32ri8: return X86::ADC32ri;
273   case X86::ADC32mi8: return X86::ADC32mi;
274   case X86::ADC64ri8: return X86::ADC64ri32;
275   case X86::ADC64mi8: return X86::ADC64mi32;
276 
277     // SUB
278   case X86::SUB16ri8: return X86::SUB16ri;
279   case X86::SUB16mi8: return X86::SUB16mi;
280   case X86::SUB32ri8: return X86::SUB32ri;
281   case X86::SUB32mi8: return X86::SUB32mi;
282   case X86::SUB64ri8: return X86::SUB64ri32;
283   case X86::SUB64mi8: return X86::SUB64mi32;
284 
285    // SBB
286   case X86::SBB16ri8: return X86::SBB16ri;
287   case X86::SBB16mi8: return X86::SBB16mi;
288   case X86::SBB32ri8: return X86::SBB32ri;
289   case X86::SBB32mi8: return X86::SBB32mi;
290   case X86::SBB64ri8: return X86::SBB64ri32;
291   case X86::SBB64mi8: return X86::SBB64mi32;
292 
293     // CMP
294   case X86::CMP16ri8: return X86::CMP16ri;
295   case X86::CMP16mi8: return X86::CMP16mi;
296   case X86::CMP32ri8: return X86::CMP32ri;
297   case X86::CMP32mi8: return X86::CMP32mi;
298   case X86::CMP64ri8: return X86::CMP64ri32;
299   case X86::CMP64mi8: return X86::CMP64mi32;
300 
301     // PUSH
302   case X86::PUSH32i8:  return X86::PUSHi32;
303   case X86::PUSH16i8:  return X86::PUSHi16;
304   case X86::PUSH64i8:  return X86::PUSH64i32;
305   }
306 }
307 
308 static unsigned getRelaxedOpcode(const MCInst &Inst, bool Is16BitMode) {
309   unsigned R = getRelaxedOpcodeArith(Inst);
310   if (R != Inst.getOpcode())
311     return R;
312   return getRelaxedOpcodeBranch(Inst, Is16BitMode);
313 }
314 
315 static X86::CondCode getCondFromBranch(const MCInst &MI,
316                                        const MCInstrInfo &MCII) {
317   unsigned Opcode = MI.getOpcode();
318   switch (Opcode) {
319   default:
320     return X86::COND_INVALID;
321   case X86::JCC_1: {
322     const MCInstrDesc &Desc = MCII.get(Opcode);
323     return static_cast<X86::CondCode>(
324         MI.getOperand(Desc.getNumOperands() - 1).getImm());
325   }
326   }
327 }
328 
329 static X86::SecondMacroFusionInstKind
330 classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) {
331   X86::CondCode CC = getCondFromBranch(MI, MCII);
332   return classifySecondCondCodeInMacroFusion(CC);
333 }
334 
335 /// Check if the instruction uses RIP relative addressing.
336 static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) {
337   unsigned Opcode = MI.getOpcode();
338   const MCInstrDesc &Desc = MCII.get(Opcode);
339   uint64_t TSFlags = Desc.TSFlags;
340   unsigned CurOp = X86II::getOperandBias(Desc);
341   int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
342   if (MemoryOperand < 0)
343     return false;
344   unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg;
345   unsigned BaseReg = MI.getOperand(BaseRegNum).getReg();
346   return (BaseReg == X86::RIP);
347 }
348 
349 /// Check if the instruction is a prefix.
350 static bool isPrefix(const MCInst &MI, const MCInstrInfo &MCII) {
351   return X86II::isPrefix(MCII.get(MI.getOpcode()).TSFlags);
352 }
353 
354 /// Check if the instruction is valid as the first instruction in macro fusion.
355 static bool isFirstMacroFusibleInst(const MCInst &Inst,
356                                     const MCInstrInfo &MCII) {
357   // An Intel instruction with RIP relative addressing is not macro fusible.
358   if (isRIPRelative(Inst, MCII))
359     return false;
360   X86::FirstMacroFusionInstKind FIK =
361       X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode());
362   return FIK != X86::FirstMacroFusionInstKind::Invalid;
363 }
364 
365 /// X86 can reduce the bytes of NOP by padding instructions with prefixes to
366 /// get a better peformance in some cases. Here, we determine which prefix is
367 /// the most suitable.
368 ///
369 /// If the instruction has a segment override prefix, use the existing one.
370 /// If the target is 64-bit, use the CS.
371 /// If the target is 32-bit,
372 ///   - If the instruction has a ESP/EBP base register, use SS.
373 ///   - Otherwise use DS.
374 uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const {
375   assert((STI.hasFeature(X86::Mode32Bit) || STI.hasFeature(X86::Mode64Bit)) &&
376          "Prefixes can be added only in 32-bit or 64-bit mode.");
377   const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
378   uint64_t TSFlags = Desc.TSFlags;
379 
380   // Determine where the memory operand starts, if present.
381   int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
382   if (MemoryOperand != -1)
383     MemoryOperand += X86II::getOperandBias(Desc);
384 
385   unsigned SegmentReg = 0;
386   if (MemoryOperand >= 0) {
387     // Check for explicit segment override on memory operand.
388     SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg();
389   }
390 
391   switch (TSFlags & X86II::FormMask) {
392   default:
393     break;
394   case X86II::RawFrmDstSrc: {
395     // Check segment override opcode prefix as needed (not for %ds).
396     if (Inst.getOperand(2).getReg() != X86::DS)
397       SegmentReg = Inst.getOperand(2).getReg();
398     break;
399   }
400   case X86II::RawFrmSrc: {
401     // Check segment override opcode prefix as needed (not for %ds).
402     if (Inst.getOperand(1).getReg() != X86::DS)
403       SegmentReg = Inst.getOperand(1).getReg();
404     break;
405   }
406   case X86II::RawFrmMemOffs: {
407     // Check segment override opcode prefix as needed.
408     SegmentReg = Inst.getOperand(1).getReg();
409     break;
410   }
411   }
412 
413   if (SegmentReg != 0)
414     return X86::getSegmentOverridePrefixForReg(SegmentReg);
415 
416   if (STI.hasFeature(X86::Mode64Bit))
417     return X86::CS_Encoding;
418 
419   if (MemoryOperand >= 0) {
420     unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg;
421     unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg();
422     if (BaseReg == X86::ESP || BaseReg == X86::EBP)
423       return X86::SS_Encoding;
424   }
425   return X86::DS_Encoding;
426 }
427 
428 /// Check if the two instructions will be macro-fused on the target cpu.
429 bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const {
430   const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode());
431   if (!InstDesc.isConditionalBranch())
432     return false;
433   if (!isFirstMacroFusibleInst(Cmp, *MCII))
434     return false;
435   const X86::FirstMacroFusionInstKind CmpKind =
436       X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode());
437   const X86::SecondMacroFusionInstKind BranchKind =
438       classifySecondInstInMacroFusion(Jcc, *MCII);
439   return X86::isMacroFused(CmpKind, BranchKind);
440 }
441 
442 /// Check if the instruction has a variant symbol operand.
443 static bool hasVariantSymbol(const MCInst &MI) {
444   for (auto &Operand : MI) {
445     if (!Operand.isExpr())
446       continue;
447     const MCExpr &Expr = *Operand.getExpr();
448     if (Expr.getKind() == MCExpr::SymbolRef &&
449         cast<MCSymbolRefExpr>(Expr).getKind() != MCSymbolRefExpr::VK_None)
450       return true;
451   }
452   return false;
453 }
454 
455 bool X86AsmBackend::allowAutoPadding() const {
456   return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone);
457 }
458 
459 bool X86AsmBackend::allowEnhancedRelaxation() const {
460   return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign;
461 }
462 
463 /// X86 has certain instructions which enable interrupts exactly one
464 /// instruction *after* the instruction which stores to SS.  Return true if the
465 /// given instruction has such an interrupt delay slot.
466 static bool hasInterruptDelaySlot(const MCInst &Inst) {
467   switch (Inst.getOpcode()) {
468   case X86::POPSS16:
469   case X86::POPSS32:
470   case X86::STI:
471     return true;
472 
473   case X86::MOV16sr:
474   case X86::MOV32sr:
475   case X86::MOV64sr:
476   case X86::MOV16sm:
477     if (Inst.getOperand(0).getReg() == X86::SS)
478       return true;
479     break;
480   }
481   return false;
482 }
483 
484 /// Check if the instruction to be emitted is right after any data.
485 static bool
486 isRightAfterData(MCFragment *CurrentFragment,
487                  const std::pair<MCFragment *, size_t> &PrevInstPosition) {
488   MCFragment *F = CurrentFragment;
489   // Empty data fragments may be created to prevent further data being
490   // added into the previous fragment, we need to skip them since they
491   // have no contents.
492   for (; isa_and_nonnull<MCDataFragment>(F); F = F->getPrevNode())
493     if (cast<MCDataFragment>(F)->getContents().size() != 0)
494       break;
495 
496   // Since data is always emitted into a DataFragment, our check strategy is
497   // simple here.
498   //   - If the fragment is a DataFragment
499   //     - If it's not the fragment where the previous instruction is,
500   //       returns true.
501   //     - If it's the fragment holding the previous instruction but its
502   //       size changed since the the previous instruction was emitted into
503   //       it, returns true.
504   //     - Otherwise returns false.
505   //   - If the fragment is not a DataFragment, returns false.
506   if (auto *DF = dyn_cast_or_null<MCDataFragment>(F))
507     return DF != PrevInstPosition.first ||
508            DF->getContents().size() != PrevInstPosition.second;
509 
510   return false;
511 }
512 
513 /// \returns the fragment size if it has instructions, otherwise returns 0.
514 static size_t getSizeForInstFragment(const MCFragment *F) {
515   if (!F || !F->hasInstructions())
516     return 0;
517   // MCEncodedFragmentWithContents being templated makes this tricky.
518   switch (F->getKind()) {
519   default:
520     llvm_unreachable("Unknown fragment with instructions!");
521   case MCFragment::FT_Data:
522     return cast<MCDataFragment>(*F).getContents().size();
523   case MCFragment::FT_Relaxable:
524     return cast<MCRelaxableFragment>(*F).getContents().size();
525   case MCFragment::FT_CompactEncodedInst:
526     return cast<MCCompactEncodedInstFragment>(*F).getContents().size();
527   }
528 }
529 
530 /// Return true if we can insert NOP or prefixes automatically before the
531 /// the instruction to be emitted.
532 bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const {
533   if (hasVariantSymbol(Inst))
534     // Linker may rewrite the instruction with variant symbol operand(e.g.
535     // TLSCALL).
536     return false;
537 
538   if (hasInterruptDelaySlot(PrevInst))
539     // If this instruction follows an interrupt enabling instruction with a one
540     // instruction delay, inserting a nop would change behavior.
541     return false;
542 
543   if (isPrefix(PrevInst, *MCII))
544     // If this instruction follows a prefix, inserting a nop/prefix would change
545     // semantic.
546     return false;
547 
548   if (isPrefix(Inst, *MCII))
549     // If this instruction is a prefix, inserting a prefix would change
550     // semantic.
551     return false;
552 
553   if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition))
554     // If this instruction follows any data, there is no clear
555     // instruction boundary, inserting a nop/prefix would change semantic.
556     return false;
557 
558   return true;
559 }
560 
561 bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const {
562   if (!OS.getAllowAutoPadding())
563     return false;
564   assert(allowAutoPadding() && "incorrect initialization!");
565 
566   // We only pad in text section.
567   if (!OS.getCurrentSectionOnly()->getKind().isText())
568     return false;
569 
570   // To be Done: Currently don't deal with Bundle cases.
571   if (OS.getAssembler().isBundlingEnabled())
572     return false;
573 
574   // Branches only need to be aligned in 32-bit or 64-bit mode.
575   if (!(STI.hasFeature(X86::Mode64Bit) || STI.hasFeature(X86::Mode32Bit)))
576     return false;
577 
578   return true;
579 }
580 
581 /// Check if the instruction operand needs to be aligned.
582 bool X86AsmBackend::needAlign(const MCInst &Inst) const {
583   const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
584   return (Desc.isConditionalBranch() &&
585           (AlignBranchType & X86::AlignBranchJcc)) ||
586          (Desc.isUnconditionalBranch() &&
587           (AlignBranchType & X86::AlignBranchJmp)) ||
588          (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) ||
589          (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) ||
590          (Desc.isIndirectBranch() &&
591           (AlignBranchType & X86::AlignBranchIndirect));
592 }
593 
594 /// Insert BoundaryAlignFragment before instructions to align branches.
595 void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
596                                          const MCInst &Inst, const MCSubtargetInfo &STI) {
597   CanPadInst = canPadInst(Inst, OS);
598 
599   if (!canPadBranches(OS))
600     return;
601 
602   if (!isMacroFused(PrevInst, Inst))
603     // Macro fusion doesn't happen indeed, clear the pending.
604     PendingBA = nullptr;
605 
606   if (!CanPadInst)
607     return;
608 
609   if (PendingBA && OS.getCurrentFragment()->getPrevNode() == PendingBA) {
610     // Macro fusion actually happens and there is no other fragment inserted
611     // after the previous instruction.
612     //
613     // Do nothing here since we already inserted a BoudaryAlign fragment when
614     // we met the first instruction in the fused pair and we'll tie them
615     // together in emitInstructionEnd.
616     //
617     // Note: When there is at least one fragment, such as MCAlignFragment,
618     // inserted after the previous instruction, e.g.
619     //
620     // \code
621     //   cmp %rax %rcx
622     //   .align 16
623     //   je .Label0
624     // \ endcode
625     //
626     // We will treat the JCC as a unfused branch although it may be fused
627     // with the CMP.
628     return;
629   }
630 
631   if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) &&
632                           isFirstMacroFusibleInst(Inst, *MCII))) {
633     // If we meet a unfused branch or the first instuction in a fusiable pair,
634     // insert a BoundaryAlign fragment.
635     OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary, STI));
636   }
637 }
638 
639 /// Set the last fragment to be aligned for the BoundaryAlignFragment.
640 void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) {
641   PrevInst = Inst;
642   MCFragment *CF = OS.getCurrentFragment();
643   PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF));
644   if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF))
645     F->setAllowAutoPadding(CanPadInst);
646 
647   if (!canPadBranches(OS))
648     return;
649 
650   if (!needAlign(Inst) || !PendingBA)
651     return;
652 
653   // Tie the aligned instructions into a a pending BoundaryAlign.
654   PendingBA->setLastFragment(CF);
655   PendingBA = nullptr;
656 
657   // We need to ensure that further data isn't added to the current
658   // DataFragment, so that we can get the size of instructions later in
659   // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty
660   // DataFragment.
661   if (isa_and_nonnull<MCDataFragment>(CF))
662     OS.insert(new MCDataFragment());
663 
664   // Update the maximum alignment on the current section if necessary.
665   MCSection *Sec = OS.getCurrentSectionOnly();
666   if (AlignBoundary.value() > Sec->getAlignment())
667     Sec->setAlignment(AlignBoundary);
668 }
669 
670 Optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const {
671   if (STI.getTargetTriple().isOSBinFormatELF()) {
672     unsigned Type;
673     if (STI.getTargetTriple().getArch() == Triple::x86_64) {
674       Type = llvm::StringSwitch<unsigned>(Name)
675 #define ELF_RELOC(X, Y) .Case(#X, Y)
676 #include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
677 #undef ELF_RELOC
678                  .Case("BFD_RELOC_NONE", ELF::R_X86_64_NONE)
679                  .Case("BFD_RELOC_8", ELF::R_X86_64_8)
680                  .Case("BFD_RELOC_16", ELF::R_X86_64_16)
681                  .Case("BFD_RELOC_32", ELF::R_X86_64_32)
682                  .Case("BFD_RELOC_64", ELF::R_X86_64_64)
683                  .Default(-1u);
684     } else {
685       Type = llvm::StringSwitch<unsigned>(Name)
686 #define ELF_RELOC(X, Y) .Case(#X, Y)
687 #include "llvm/BinaryFormat/ELFRelocs/i386.def"
688 #undef ELF_RELOC
689                  .Case("BFD_RELOC_NONE", ELF::R_386_NONE)
690                  .Case("BFD_RELOC_8", ELF::R_386_8)
691                  .Case("BFD_RELOC_16", ELF::R_386_16)
692                  .Case("BFD_RELOC_32", ELF::R_386_32)
693                  .Default(-1u);
694     }
695     if (Type == -1u)
696       return None;
697     return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
698   }
699   return MCAsmBackend::getFixupKind(Name);
700 }
701 
702 const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
703   const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
704       {"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
705       {"reloc_riprel_4byte_movq_load", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
706       {"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
707       {"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
708       {"reloc_signed_4byte", 0, 32, 0},
709       {"reloc_signed_4byte_relax", 0, 32, 0},
710       {"reloc_global_offset_table", 0, 32, 0},
711       {"reloc_global_offset_table8", 0, 64, 0},
712       {"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
713   };
714 
715   // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They
716   // do not require any extra processing.
717   if (Kind >= FirstLiteralRelocationKind)
718     return MCAsmBackend::getFixupKindInfo(FK_NONE);
719 
720   if (Kind < FirstTargetFixupKind)
721     return MCAsmBackend::getFixupKindInfo(Kind);
722 
723   assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
724          "Invalid kind!");
725   assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!");
726   return Infos[Kind - FirstTargetFixupKind];
727 }
728 
729 bool X86AsmBackend::shouldForceRelocation(const MCAssembler &,
730                                           const MCFixup &Fixup,
731                                           const MCValue &) {
732   return Fixup.getKind() >= FirstLiteralRelocationKind;
733 }
734 
735 static unsigned getFixupKindSize(unsigned Kind) {
736   switch (Kind) {
737   default:
738     llvm_unreachable("invalid fixup kind!");
739   case FK_NONE:
740     return 0;
741   case FK_PCRel_1:
742   case FK_SecRel_1:
743   case FK_Data_1:
744     return 1;
745   case FK_PCRel_2:
746   case FK_SecRel_2:
747   case FK_Data_2:
748     return 2;
749   case FK_PCRel_4:
750   case X86::reloc_riprel_4byte:
751   case X86::reloc_riprel_4byte_relax:
752   case X86::reloc_riprel_4byte_relax_rex:
753   case X86::reloc_riprel_4byte_movq_load:
754   case X86::reloc_signed_4byte:
755   case X86::reloc_signed_4byte_relax:
756   case X86::reloc_global_offset_table:
757   case X86::reloc_branch_4byte_pcrel:
758   case FK_SecRel_4:
759   case FK_Data_4:
760     return 4;
761   case FK_PCRel_8:
762   case FK_SecRel_8:
763   case FK_Data_8:
764   case X86::reloc_global_offset_table8:
765     return 8;
766   }
767 }
768 
769 void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
770                                const MCValue &Target,
771                                MutableArrayRef<char> Data,
772                                uint64_t Value, bool IsResolved,
773                                const MCSubtargetInfo *STI) const {
774   unsigned Kind = Fixup.getKind();
775   if (Kind >= FirstLiteralRelocationKind)
776     return;
777   unsigned Size = getFixupKindSize(Kind);
778 
779   assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
780 
781   int64_t SignedValue = static_cast<int64_t>(Value);
782   if ((Target.isAbsolute() || IsResolved) &&
783       getFixupKindInfo(Fixup.getKind()).Flags &
784       MCFixupKindInfo::FKF_IsPCRel) {
785     // check that PC relative fixup fits into the fixup size.
786     if (Size > 0 && !isIntN(Size * 8, SignedValue))
787       Asm.getContext().reportError(
788                                    Fixup.getLoc(), "value of " + Twine(SignedValue) +
789                                    " is too large for field of " + Twine(Size) +
790                                    ((Size == 1) ? " byte." : " bytes."));
791   } else {
792     // Check that uppper bits are either all zeros or all ones.
793     // Specifically ignore overflow/underflow as long as the leakage is
794     // limited to the lower bits. This is to remain compatible with
795     // other assemblers.
796     assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) &&
797            "Value does not fit in the Fixup field");
798   }
799 
800   for (unsigned i = 0; i != Size; ++i)
801     Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
802 }
803 
804 bool X86AsmBackend::mayNeedRelaxation(const MCInst &Inst,
805                                       const MCSubtargetInfo &STI) const {
806   // Branches can always be relaxed in either mode.
807   if (getRelaxedOpcodeBranch(Inst, false) != Inst.getOpcode())
808     return true;
809 
810   // Check if this instruction is ever relaxable.
811   if (getRelaxedOpcodeArith(Inst) == Inst.getOpcode())
812     return false;
813 
814 
815   // Check if the relaxable operand has an expression. For the current set of
816   // relaxable instructions, the relaxable operand is always the last operand.
817   unsigned RelaxableOp = Inst.getNumOperands() - 1;
818   if (Inst.getOperand(RelaxableOp).isExpr())
819     return true;
820 
821   return false;
822 }
823 
824 bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
825                                          uint64_t Value,
826                                          const MCRelaxableFragment *DF,
827                                          const MCAsmLayout &Layout) const {
828   // Relax if the value is too big for a (signed) i8.
829   return !isInt<8>(Value);
830 }
831 
832 // FIXME: Can tblgen help at all here to verify there aren't other instructions
833 // we can relax?
834 void X86AsmBackend::relaxInstruction(MCInst &Inst,
835                                      const MCSubtargetInfo &STI) const {
836   // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
837   bool Is16BitMode = STI.getFeatureBits()[X86::Mode16Bit];
838   unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode);
839 
840   if (RelaxedOp == Inst.getOpcode()) {
841     SmallString<256> Tmp;
842     raw_svector_ostream OS(Tmp);
843     Inst.dump_pretty(OS);
844     OS << "\n";
845     report_fatal_error("unexpected instruction to relax: " + OS.str());
846   }
847 
848   Inst.setOpcode(RelaxedOp);
849 }
850 
851 /// Return true if this instruction has been fully relaxed into it's most
852 /// general available form.
853 static bool isFullyRelaxed(const MCRelaxableFragment &RF) {
854   auto &Inst = RF.getInst();
855   auto &STI = *RF.getSubtargetInfo();
856   bool Is16BitMode = STI.getFeatureBits()[X86::Mode16Bit];
857   return getRelaxedOpcode(Inst, Is16BitMode) == Inst.getOpcode();
858 }
859 
860 bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF,
861                                             MCCodeEmitter &Emitter,
862                                             unsigned &RemainingSize) const {
863   if (!RF.getAllowAutoPadding())
864     return false;
865   // If the instruction isn't fully relaxed, shifting it around might require a
866   // larger value for one of the fixups then can be encoded.  The outer loop
867   // will also catch this before moving to the next instruction, but we need to
868   // prevent padding this single instruction as well.
869   if (!isFullyRelaxed(RF))
870     return false;
871 
872   const unsigned OldSize = RF.getContents().size();
873   if (OldSize == 15)
874     return false;
875 
876   const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize);
877   const unsigned RemainingPrefixSize = [&]() -> unsigned {
878     SmallString<15> Code;
879     raw_svector_ostream VecOS(Code);
880     Emitter.emitPrefix(RF.getInst(), VecOS, STI);
881     assert(Code.size() < 15 && "The number of prefixes must be less than 15.");
882 
883     // TODO: It turns out we need a decent amount of plumbing for the target
884     // specific bits to determine number of prefixes its safe to add.  Various
885     // targets (older chips mostly, but also Atom family) encounter decoder
886     // stalls with too many prefixes.  For testing purposes, we set the value
887     // externally for the moment.
888     unsigned ExistingPrefixSize = Code.size();
889     if (TargetPrefixMax <= ExistingPrefixSize)
890       return 0;
891     return TargetPrefixMax - ExistingPrefixSize;
892   }();
893   const unsigned PrefixBytesToAdd =
894       std::min(MaxPossiblePad, RemainingPrefixSize);
895   if (PrefixBytesToAdd == 0)
896     return false;
897 
898   const uint8_t Prefix = determinePaddingPrefix(RF.getInst());
899 
900   SmallString<256> Code;
901   Code.append(PrefixBytesToAdd, Prefix);
902   Code.append(RF.getContents().begin(), RF.getContents().end());
903   RF.getContents() = Code;
904 
905   // Adjust the fixups for the change in offsets
906   for (auto &F : RF.getFixups()) {
907     F.setOffset(F.getOffset() + PrefixBytesToAdd);
908   }
909 
910   RemainingSize -= PrefixBytesToAdd;
911   return true;
912 }
913 
914 bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF,
915                                                 MCCodeEmitter &Emitter,
916                                                 unsigned &RemainingSize) const {
917   if (isFullyRelaxed(RF))
918     // TODO: There are lots of other tricks we could apply for increasing
919     // encoding size without impacting performance.
920     return false;
921 
922   MCInst Relaxed = RF.getInst();
923   relaxInstruction(Relaxed, *RF.getSubtargetInfo());
924 
925   SmallVector<MCFixup, 4> Fixups;
926   SmallString<15> Code;
927   raw_svector_ostream VecOS(Code);
928   Emitter.encodeInstruction(Relaxed, VecOS, Fixups, *RF.getSubtargetInfo());
929   const unsigned OldSize = RF.getContents().size();
930   const unsigned NewSize = Code.size();
931   assert(NewSize >= OldSize && "size decrease during relaxation?");
932   unsigned Delta = NewSize - OldSize;
933   if (Delta > RemainingSize)
934     return false;
935   RF.setInst(Relaxed);
936   RF.getContents() = Code;
937   RF.getFixups() = Fixups;
938   RemainingSize -= Delta;
939   return true;
940 }
941 
942 bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF,
943                                            MCCodeEmitter &Emitter,
944                                            unsigned &RemainingSize) const {
945   bool Changed = false;
946   if (RemainingSize != 0)
947     Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize);
948   if (RemainingSize != 0)
949     Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize);
950   return Changed;
951 }
952 
953 void X86AsmBackend::finishLayout(MCAssembler const &Asm,
954                                  MCAsmLayout &Layout) const {
955   // See if we can further relax some instructions to cut down on the number of
956   // nop bytes required for code alignment.  The actual win is in reducing
957   // instruction count, not number of bytes.  Modern X86-64 can easily end up
958   // decode limited.  It is often better to reduce the number of instructions
959   // (i.e. eliminate nops) even at the cost of increasing the size and
960   // complexity of others.
961   if (!X86PadForAlign && !X86PadForBranchAlign)
962     return;
963 
964   // The processed regions are delimitered by LabeledFragments. -g may have more
965   // MCSymbols and therefore different relaxation results. X86PadForAlign is
966   // disabled by default to eliminate the -g vs non -g difference.
967   DenseSet<MCFragment *> LabeledFragments;
968   for (const MCSymbol &S : Asm.symbols())
969     LabeledFragments.insert(S.getFragment(false));
970 
971   for (MCSection &Sec : Asm) {
972     if (!Sec.getKind().isText())
973       continue;
974 
975     SmallVector<MCRelaxableFragment *, 4> Relaxable;
976     for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) {
977       MCFragment &F = *I;
978 
979       if (LabeledFragments.count(&F))
980         Relaxable.clear();
981 
982       if (F.getKind() == MCFragment::FT_Data ||
983           F.getKind() == MCFragment::FT_CompactEncodedInst)
984         // Skip and ignore
985         continue;
986 
987       if (F.getKind() == MCFragment::FT_Relaxable) {
988         auto &RF = cast<MCRelaxableFragment>(*I);
989         Relaxable.push_back(&RF);
990         continue;
991       }
992 
993       auto canHandle = [](MCFragment &F) -> bool {
994         switch (F.getKind()) {
995         default:
996           return false;
997         case MCFragment::FT_Align:
998           return X86PadForAlign;
999         case MCFragment::FT_BoundaryAlign:
1000           return X86PadForBranchAlign;
1001         }
1002       };
1003       // For any unhandled kind, assume we can't change layout.
1004       if (!canHandle(F)) {
1005         Relaxable.clear();
1006         continue;
1007       }
1008 
1009 #ifndef NDEBUG
1010       const uint64_t OrigOffset = Layout.getFragmentOffset(&F);
1011 #endif
1012       const uint64_t OrigSize = Asm.computeFragmentSize(Layout, F);
1013 
1014       // To keep the effects local, prefer to relax instructions closest to
1015       // the align directive.  This is purely about human understandability
1016       // of the resulting code.  If we later find a reason to expand
1017       // particular instructions over others, we can adjust.
1018       MCFragment *FirstChangedFragment = nullptr;
1019       unsigned RemainingSize = OrigSize;
1020       while (!Relaxable.empty() && RemainingSize != 0) {
1021         auto &RF = *Relaxable.pop_back_val();
1022         // Give the backend a chance to play any tricks it wishes to increase
1023         // the encoding size of the given instruction.  Target independent code
1024         // will try further relaxation, but target's may play further tricks.
1025         if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize))
1026           FirstChangedFragment = &RF;
1027 
1028         // If we have an instruction which hasn't been fully relaxed, we can't
1029         // skip past it and insert bytes before it.  Changing its starting
1030         // offset might require a larger negative offset than it can encode.
1031         // We don't need to worry about larger positive offsets as none of the
1032         // possible offsets between this and our align are visible, and the
1033         // ones afterwards aren't changing.
1034         if (!isFullyRelaxed(RF))
1035           break;
1036       }
1037       Relaxable.clear();
1038 
1039       if (FirstChangedFragment) {
1040         // Make sure the offsets for any fragments in the effected range get
1041         // updated.  Note that this (conservatively) invalidates the offsets of
1042         // those following, but this is not required.
1043         Layout.invalidateFragmentsFrom(FirstChangedFragment);
1044       }
1045 
1046       // BoundaryAlign explicitly tracks it's size (unlike align)
1047       if (F.getKind() == MCFragment::FT_BoundaryAlign)
1048         cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize);
1049 
1050 #ifndef NDEBUG
1051       const uint64_t FinalOffset = Layout.getFragmentOffset(&F);
1052       const uint64_t FinalSize = Asm.computeFragmentSize(Layout, F);
1053       assert(OrigOffset + OrigSize == FinalOffset + FinalSize &&
1054              "can't move start of next fragment!");
1055       assert(FinalSize == RemainingSize && "inconsistent size computation?");
1056 #endif
1057 
1058       // If we're looking at a boundary align, make sure we don't try to pad
1059       // its target instructions for some following directive.  Doing so would
1060       // break the alignment of the current boundary align.
1061       if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) {
1062         const MCFragment *LastFragment = BF->getLastFragment();
1063         if (!LastFragment)
1064           continue;
1065         while (&*I != LastFragment)
1066           ++I;
1067       }
1068     }
1069   }
1070 
1071   // The layout is done. Mark every fragment as valid.
1072   for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
1073     MCSection &Section = *Layout.getSectionOrder()[i];
1074     Layout.getFragmentOffset(&*Section.getFragmentList().rbegin());
1075     Asm.computeFragmentSize(Layout, *Section.getFragmentList().rbegin());
1076   }
1077 }
1078 
1079 unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const {
1080   if (STI.hasFeature(X86::Mode16Bit))
1081     return 4;
1082   if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Mode64Bit))
1083     return 1;
1084   if (STI.getFeatureBits()[X86::TuningFast7ByteNOP])
1085     return 7;
1086   if (STI.getFeatureBits()[X86::TuningFast15ByteNOP])
1087     return 15;
1088   if (STI.getFeatureBits()[X86::TuningFast11ByteNOP])
1089     return 11;
1090   // FIXME: handle 32-bit mode
1091   // 15-bytes is the longest single NOP instruction, but 10-bytes is
1092   // commonly the longest that can be efficiently decoded.
1093   return 10;
1094 }
1095 
1096 /// Write a sequence of optimal nops to the output, covering \p Count
1097 /// bytes.
1098 /// \return - true on success, false on failure
1099 bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
1100                                  const MCSubtargetInfo *STI) const {
1101   static const char Nops32Bit[10][11] = {
1102       // nop
1103       "\x90",
1104       // xchg %ax,%ax
1105       "\x66\x90",
1106       // nopl (%[re]ax)
1107       "\x0f\x1f\x00",
1108       // nopl 0(%[re]ax)
1109       "\x0f\x1f\x40\x00",
1110       // nopl 0(%[re]ax,%[re]ax,1)
1111       "\x0f\x1f\x44\x00\x00",
1112       // nopw 0(%[re]ax,%[re]ax,1)
1113       "\x66\x0f\x1f\x44\x00\x00",
1114       // nopl 0L(%[re]ax)
1115       "\x0f\x1f\x80\x00\x00\x00\x00",
1116       // nopl 0L(%[re]ax,%[re]ax,1)
1117       "\x0f\x1f\x84\x00\x00\x00\x00\x00",
1118       // nopw 0L(%[re]ax,%[re]ax,1)
1119       "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00",
1120       // nopw %cs:0L(%[re]ax,%[re]ax,1)
1121       "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00",
1122   };
1123 
1124   // 16-bit mode uses different nop patterns than 32-bit.
1125   static const char Nops16Bit[4][11] = {
1126       // nop
1127       "\x90",
1128       // xchg %eax,%eax
1129       "\x66\x90",
1130       // lea 0(%si),%si
1131       "\x8d\x74\x00",
1132       // lea 0w(%si),%si
1133       "\x8d\xb4\x00\x00",
1134   };
1135 
1136   const char(*Nops)[11] =
1137       STI->getFeatureBits()[X86::Mode16Bit] ? Nops16Bit : Nops32Bit;
1138 
1139   uint64_t MaxNopLength = (uint64_t)getMaximumNopSize(*STI);
1140 
1141   // Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining
1142   // length.
1143   do {
1144     const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength);
1145     const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;
1146     for (uint8_t i = 0; i < Prefixes; i++)
1147       OS << '\x66';
1148     const uint8_t Rest = ThisNopLength - Prefixes;
1149     if (Rest != 0)
1150       OS.write(Nops[Rest - 1], Rest);
1151     Count -= ThisNopLength;
1152   } while (Count != 0);
1153 
1154   return true;
1155 }
1156 
1157 /* *** */
1158 
1159 namespace {
1160 
1161 class ELFX86AsmBackend : public X86AsmBackend {
1162 public:
1163   uint8_t OSABI;
1164   ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI)
1165       : X86AsmBackend(T, STI), OSABI(OSABI) {}
1166 };
1167 
1168 class ELFX86_32AsmBackend : public ELFX86AsmBackend {
1169 public:
1170   ELFX86_32AsmBackend(const Target &T, uint8_t OSABI,
1171                       const MCSubtargetInfo &STI)
1172     : ELFX86AsmBackend(T, OSABI, STI) {}
1173 
1174   std::unique_ptr<MCObjectTargetWriter>
1175   createObjectTargetWriter() const override {
1176     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, ELF::EM_386);
1177   }
1178 };
1179 
1180 class ELFX86_X32AsmBackend : public ELFX86AsmBackend {
1181 public:
1182   ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI,
1183                        const MCSubtargetInfo &STI)
1184       : ELFX86AsmBackend(T, OSABI, STI) {}
1185 
1186   std::unique_ptr<MCObjectTargetWriter>
1187   createObjectTargetWriter() const override {
1188     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1189                                     ELF::EM_X86_64);
1190   }
1191 };
1192 
1193 class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend {
1194 public:
1195   ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI,
1196                          const MCSubtargetInfo &STI)
1197       : ELFX86AsmBackend(T, OSABI, STI) {}
1198 
1199   std::unique_ptr<MCObjectTargetWriter>
1200   createObjectTargetWriter() const override {
1201     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1202                                     ELF::EM_IAMCU);
1203   }
1204 };
1205 
1206 class ELFX86_64AsmBackend : public ELFX86AsmBackend {
1207 public:
1208   ELFX86_64AsmBackend(const Target &T, uint8_t OSABI,
1209                       const MCSubtargetInfo &STI)
1210     : ELFX86AsmBackend(T, OSABI, STI) {}
1211 
1212   std::unique_ptr<MCObjectTargetWriter>
1213   createObjectTargetWriter() const override {
1214     return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI, ELF::EM_X86_64);
1215   }
1216 };
1217 
1218 class WindowsX86AsmBackend : public X86AsmBackend {
1219   bool Is64Bit;
1220 
1221 public:
1222   WindowsX86AsmBackend(const Target &T, bool is64Bit,
1223                        const MCSubtargetInfo &STI)
1224     : X86AsmBackend(T, STI)
1225     , Is64Bit(is64Bit) {
1226   }
1227 
1228   Optional<MCFixupKind> getFixupKind(StringRef Name) const override {
1229     return StringSwitch<Optional<MCFixupKind>>(Name)
1230         .Case("dir32", FK_Data_4)
1231         .Case("secrel32", FK_SecRel_4)
1232         .Case("secidx", FK_SecRel_2)
1233         .Default(MCAsmBackend::getFixupKind(Name));
1234   }
1235 
1236   std::unique_ptr<MCObjectTargetWriter>
1237   createObjectTargetWriter() const override {
1238     return createX86WinCOFFObjectWriter(Is64Bit);
1239   }
1240 };
1241 
1242 namespace CU {
1243 
1244   /// Compact unwind encoding values.
1245   enum CompactUnwindEncodings {
1246     /// [RE]BP based frame where [RE]BP is pused on the stack immediately after
1247     /// the return address, then [RE]SP is moved to [RE]BP.
1248     UNWIND_MODE_BP_FRAME                   = 0x01000000,
1249 
1250     /// A frameless function with a small constant stack size.
1251     UNWIND_MODE_STACK_IMMD                 = 0x02000000,
1252 
1253     /// A frameless function with a large constant stack size.
1254     UNWIND_MODE_STACK_IND                  = 0x03000000,
1255 
1256     /// No compact unwind encoding is available.
1257     UNWIND_MODE_DWARF                      = 0x04000000,
1258 
1259     /// Mask for encoding the frame registers.
1260     UNWIND_BP_FRAME_REGISTERS              = 0x00007FFF,
1261 
1262     /// Mask for encoding the frameless registers.
1263     UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
1264   };
1265 
1266 } // namespace CU
1267 
1268 class DarwinX86AsmBackend : public X86AsmBackend {
1269   const MCRegisterInfo &MRI;
1270 
1271   /// Number of registers that can be saved in a compact unwind encoding.
1272   enum { CU_NUM_SAVED_REGS = 6 };
1273 
1274   mutable unsigned SavedRegs[CU_NUM_SAVED_REGS];
1275   Triple TT;
1276   bool Is64Bit;
1277 
1278   unsigned OffsetSize;                   ///< Offset of a "push" instruction.
1279   unsigned MoveInstrSize;                ///< Size of a "move" instruction.
1280   unsigned StackDivide;                  ///< Amount to adjust stack size by.
1281 protected:
1282   /// Size of a "push" instruction for the given register.
1283   unsigned PushInstrSize(unsigned Reg) const {
1284     switch (Reg) {
1285       case X86::EBX:
1286       case X86::ECX:
1287       case X86::EDX:
1288       case X86::EDI:
1289       case X86::ESI:
1290       case X86::EBP:
1291       case X86::RBX:
1292       case X86::RBP:
1293         return 1;
1294       case X86::R12:
1295       case X86::R13:
1296       case X86::R14:
1297       case X86::R15:
1298         return 2;
1299     }
1300     return 1;
1301   }
1302 
1303 private:
1304   /// Get the compact unwind number for a given register. The number
1305   /// corresponds to the enum lists in compact_unwind_encoding.h.
1306   int getCompactUnwindRegNum(unsigned Reg) const {
1307     static const MCPhysReg CU32BitRegs[7] = {
1308       X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
1309     };
1310     static const MCPhysReg CU64BitRegs[] = {
1311       X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
1312     };
1313     const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
1314     for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
1315       if (*CURegs == Reg)
1316         return Idx;
1317 
1318     return -1;
1319   }
1320 
1321   /// Return the registers encoded for a compact encoding with a frame
1322   /// pointer.
1323   uint32_t encodeCompactUnwindRegistersWithFrame() const {
1324     // Encode the registers in the order they were saved --- 3-bits per
1325     // register. The list of saved registers is assumed to be in reverse
1326     // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.
1327     uint32_t RegEnc = 0;
1328     for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) {
1329       unsigned Reg = SavedRegs[i];
1330       if (Reg == 0) break;
1331 
1332       int CURegNum = getCompactUnwindRegNum(Reg);
1333       if (CURegNum == -1) return ~0U;
1334 
1335       // Encode the 3-bit register number in order, skipping over 3-bits for
1336       // each register.
1337       RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
1338     }
1339 
1340     assert((RegEnc & 0x3FFFF) == RegEnc &&
1341            "Invalid compact register encoding!");
1342     return RegEnc;
1343   }
1344 
1345   /// Create the permutation encoding used with frameless stacks. It is
1346   /// passed the number of registers to be saved and an array of the registers
1347   /// saved.
1348   uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const {
1349     // The saved registers are numbered from 1 to 6. In order to encode the
1350     // order in which they were saved, we re-number them according to their
1351     // place in the register order. The re-numbering is relative to the last
1352     // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in
1353     // that order:
1354     //
1355     //    Orig  Re-Num
1356     //    ----  ------
1357     //     6       6
1358     //     2       2
1359     //     4       3
1360     //     5       3
1361     //
1362     for (unsigned i = 0; i < RegCount; ++i) {
1363       int CUReg = getCompactUnwindRegNum(SavedRegs[i]);
1364       if (CUReg == -1) return ~0U;
1365       SavedRegs[i] = CUReg;
1366     }
1367 
1368     // Reverse the list.
1369     std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]);
1370 
1371     uint32_t RenumRegs[CU_NUM_SAVED_REGS];
1372     for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){
1373       unsigned Countless = 0;
1374       for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
1375         if (SavedRegs[j] < SavedRegs[i])
1376           ++Countless;
1377 
1378       RenumRegs[i] = SavedRegs[i] - Countless - 1;
1379     }
1380 
1381     // Take the renumbered values and encode them into a 10-bit number.
1382     uint32_t permutationEncoding = 0;
1383     switch (RegCount) {
1384     case 6:
1385       permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
1386                              + 6 * RenumRegs[2] +  2 * RenumRegs[3]
1387                              +     RenumRegs[4];
1388       break;
1389     case 5:
1390       permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
1391                              + 6 * RenumRegs[3] +  2 * RenumRegs[4]
1392                              +     RenumRegs[5];
1393       break;
1394     case 4:
1395       permutationEncoding |=  60 * RenumRegs[2] + 12 * RenumRegs[3]
1396                              + 3 * RenumRegs[4] +      RenumRegs[5];
1397       break;
1398     case 3:
1399       permutationEncoding |=  20 * RenumRegs[3] +  4 * RenumRegs[4]
1400                              +     RenumRegs[5];
1401       break;
1402     case 2:
1403       permutationEncoding |=   5 * RenumRegs[4] +      RenumRegs[5];
1404       break;
1405     case 1:
1406       permutationEncoding |=       RenumRegs[5];
1407       break;
1408     }
1409 
1410     assert((permutationEncoding & 0x3FF) == permutationEncoding &&
1411            "Invalid compact register encoding!");
1412     return permutationEncoding;
1413   }
1414 
1415 public:
1416   DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI,
1417                       const MCSubtargetInfo &STI)
1418       : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()),
1419         Is64Bit(TT.isArch64Bit()) {
1420     memset(SavedRegs, 0, sizeof(SavedRegs));
1421     OffsetSize = Is64Bit ? 8 : 4;
1422     MoveInstrSize = Is64Bit ? 3 : 2;
1423     StackDivide = Is64Bit ? 8 : 4;
1424   }
1425 
1426   std::unique_ptr<MCObjectTargetWriter>
1427   createObjectTargetWriter() const override {
1428     uint32_t CPUType = cantFail(MachO::getCPUType(TT));
1429     uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT));
1430     return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType);
1431   }
1432 
1433   /// Implementation of algorithm to generate the compact unwind encoding
1434   /// for the CFI instructions.
1435   uint32_t
1436   generateCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs) const override {
1437     if (Instrs.empty()) return 0;
1438 
1439     // Reset the saved registers.
1440     unsigned SavedRegIdx = 0;
1441     memset(SavedRegs, 0, sizeof(SavedRegs));
1442 
1443     bool HasFP = false;
1444 
1445     // Encode that we are using EBP/RBP as the frame pointer.
1446     uint32_t CompactUnwindEncoding = 0;
1447 
1448     unsigned SubtractInstrIdx = Is64Bit ? 3 : 2;
1449     unsigned InstrOffset = 0;
1450     unsigned StackAdjust = 0;
1451     unsigned StackSize = 0;
1452     unsigned NumDefCFAOffsets = 0;
1453     int MinAbsOffset = std::numeric_limits<int>::max();
1454 
1455     for (const MCCFIInstruction &Inst : Instrs) {
1456       switch (Inst.getOperation()) {
1457       default:
1458         // Any other CFI directives indicate a frame that we aren't prepared
1459         // to represent via compact unwind, so just bail out.
1460         return 0;
1461       case MCCFIInstruction::OpDefCfaRegister: {
1462         // Defines a frame pointer. E.g.
1463         //
1464         //     movq %rsp, %rbp
1465         //  L0:
1466         //     .cfi_def_cfa_register %rbp
1467         //
1468         HasFP = true;
1469 
1470         // If the frame pointer is other than esp/rsp, we do not have a way to
1471         // generate a compact unwinding representation, so bail out.
1472         if (*MRI.getLLVMRegNum(Inst.getRegister(), true) !=
1473             (Is64Bit ? X86::RBP : X86::EBP))
1474           return 0;
1475 
1476         // Reset the counts.
1477         memset(SavedRegs, 0, sizeof(SavedRegs));
1478         StackAdjust = 0;
1479         SavedRegIdx = 0;
1480         MinAbsOffset = std::numeric_limits<int>::max();
1481         InstrOffset += MoveInstrSize;
1482         break;
1483       }
1484       case MCCFIInstruction::OpDefCfaOffset: {
1485         // Defines a new offset for the CFA. E.g.
1486         //
1487         //  With frame:
1488         //
1489         //     pushq %rbp
1490         //  L0:
1491         //     .cfi_def_cfa_offset 16
1492         //
1493         //  Without frame:
1494         //
1495         //     subq $72, %rsp
1496         //  L0:
1497         //     .cfi_def_cfa_offset 80
1498         //
1499         StackSize = Inst.getOffset() / StackDivide;
1500         ++NumDefCFAOffsets;
1501         break;
1502       }
1503       case MCCFIInstruction::OpOffset: {
1504         // Defines a "push" of a callee-saved register. E.g.
1505         //
1506         //     pushq %r15
1507         //     pushq %r14
1508         //     pushq %rbx
1509         //  L0:
1510         //     subq $120, %rsp
1511         //  L1:
1512         //     .cfi_offset %rbx, -40
1513         //     .cfi_offset %r14, -32
1514         //     .cfi_offset %r15, -24
1515         //
1516         if (SavedRegIdx == CU_NUM_SAVED_REGS)
1517           // If there are too many saved registers, we cannot use a compact
1518           // unwind encoding.
1519           return CU::UNWIND_MODE_DWARF;
1520 
1521         unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true);
1522         SavedRegs[SavedRegIdx++] = Reg;
1523         StackAdjust += OffsetSize;
1524         MinAbsOffset = std::min(MinAbsOffset, abs(Inst.getOffset()));
1525         InstrOffset += PushInstrSize(Reg);
1526         break;
1527       }
1528       }
1529     }
1530 
1531     StackAdjust /= StackDivide;
1532 
1533     if (HasFP) {
1534       if ((StackAdjust & 0xFF) != StackAdjust)
1535         // Offset was too big for a compact unwind encoding.
1536         return CU::UNWIND_MODE_DWARF;
1537 
1538       // We don't attempt to track a real StackAdjust, so if the saved registers
1539       // aren't adjacent to rbp we can't cope.
1540       if (SavedRegIdx != 0 && MinAbsOffset != 3 * (int)OffsetSize)
1541         return CU::UNWIND_MODE_DWARF;
1542 
1543       // Get the encoding of the saved registers when we have a frame pointer.
1544       uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame();
1545       if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1546 
1547       CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME;
1548       CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
1549       CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
1550     } else {
1551       SubtractInstrIdx += InstrOffset;
1552       ++StackAdjust;
1553 
1554       if ((StackSize & 0xFF) == StackSize) {
1555         // Frameless stack with a small stack size.
1556         CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD;
1557 
1558         // Encode the stack size.
1559         CompactUnwindEncoding |= (StackSize & 0xFF) << 16;
1560       } else {
1561         if ((StackAdjust & 0x7) != StackAdjust)
1562           // The extra stack adjustments are too big for us to handle.
1563           return CU::UNWIND_MODE_DWARF;
1564 
1565         // Frameless stack with an offset too large for us to encode compactly.
1566         CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND;
1567 
1568         // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
1569         // instruction.
1570         CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
1571 
1572         // Encode any extra stack adjustments (done via push instructions).
1573         CompactUnwindEncoding |= (StackAdjust & 0x7) << 13;
1574       }
1575 
1576       // Encode the number of registers saved. (Reverse the list first.)
1577       std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]);
1578       CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10;
1579 
1580       // Get the encoding of the saved registers when we don't have a frame
1581       // pointer.
1582       uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx);
1583       if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1584 
1585       // Encode the register encoding.
1586       CompactUnwindEncoding |=
1587         RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION;
1588     }
1589 
1590     return CompactUnwindEncoding;
1591   }
1592 };
1593 
1594 } // end anonymous namespace
1595 
1596 MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
1597                                            const MCSubtargetInfo &STI,
1598                                            const MCRegisterInfo &MRI,
1599                                            const MCTargetOptions &Options) {
1600   const Triple &TheTriple = STI.getTargetTriple();
1601   if (TheTriple.isOSBinFormatMachO())
1602     return new DarwinX86AsmBackend(T, MRI, STI);
1603 
1604   if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1605     return new WindowsX86AsmBackend(T, false, STI);
1606 
1607   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1608 
1609   if (TheTriple.isOSIAMCU())
1610     return new ELFX86_IAMCUAsmBackend(T, OSABI, STI);
1611 
1612   return new ELFX86_32AsmBackend(T, OSABI, STI);
1613 }
1614 
1615 MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
1616                                            const MCSubtargetInfo &STI,
1617                                            const MCRegisterInfo &MRI,
1618                                            const MCTargetOptions &Options) {
1619   const Triple &TheTriple = STI.getTargetTriple();
1620   if (TheTriple.isOSBinFormatMachO())
1621     return new DarwinX86AsmBackend(T, MRI, STI);
1622 
1623   if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1624     return new WindowsX86AsmBackend(T, true, STI);
1625 
1626   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1627 
1628   if (TheTriple.isX32())
1629     return new ELFX86_X32AsmBackend(T, OSABI, STI);
1630   return new ELFX86_64AsmBackend(T, OSABI, STI);
1631 }
1632