1 //===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MCTargetDesc/X86BaseInfo.h"
10 #include "MCTargetDesc/X86FixupKinds.h"
11 #include "MCTargetDesc/X86InstrRelaxTables.h"
12 #include "llvm/ADT/StringSwitch.h"
13 #include "llvm/BinaryFormat/ELF.h"
14 #include "llvm/BinaryFormat/MachO.h"
15 #include "llvm/MC/MCAsmBackend.h"
16 #include "llvm/MC/MCAsmLayout.h"
17 #include "llvm/MC/MCAssembler.h"
18 #include "llvm/MC/MCCodeEmitter.h"
19 #include "llvm/MC/MCContext.h"
20 #include "llvm/MC/MCDwarf.h"
21 #include "llvm/MC/MCELFObjectWriter.h"
22 #include "llvm/MC/MCExpr.h"
23 #include "llvm/MC/MCFixupKindInfo.h"
24 #include "llvm/MC/MCInst.h"
25 #include "llvm/MC/MCInstrInfo.h"
26 #include "llvm/MC/MCMachObjectWriter.h"
27 #include "llvm/MC/MCObjectStreamer.h"
28 #include "llvm/MC/MCObjectWriter.h"
29 #include "llvm/MC/MCRegisterInfo.h"
30 #include "llvm/MC/MCSectionMachO.h"
31 #include "llvm/MC/MCSubtargetInfo.h"
32 #include "llvm/MC/MCValue.h"
33 #include "llvm/MC/TargetRegistry.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Support/raw_ostream.h"
37 
38 using namespace llvm;
39 
40 namespace {
41 /// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind
42 class X86AlignBranchKind {
43 private:
44   uint8_t AlignBranchKind = 0;
45 
46 public:
operator =(const std::string & Val)47   void operator=(const std::string &Val) {
48     if (Val.empty())
49       return;
50     SmallVector<StringRef, 6> BranchTypes;
51     StringRef(Val).split(BranchTypes, '+', -1, false);
52     for (auto BranchType : BranchTypes) {
53       if (BranchType == "fused")
54         addKind(X86::AlignBranchFused);
55       else if (BranchType == "jcc")
56         addKind(X86::AlignBranchJcc);
57       else if (BranchType == "jmp")
58         addKind(X86::AlignBranchJmp);
59       else if (BranchType == "call")
60         addKind(X86::AlignBranchCall);
61       else if (BranchType == "ret")
62         addKind(X86::AlignBranchRet);
63       else if (BranchType == "indirect")
64         addKind(X86::AlignBranchIndirect);
65       else {
66         errs() << "invalid argument " << BranchType.str()
67                << " to -x86-align-branch=; each element must be one of: fused, "
68                   "jcc, jmp, call, ret, indirect.(plus separated)\n";
69       }
70     }
71   }
72 
operator uint8_t() const73   operator uint8_t() const { return AlignBranchKind; }
addKind(X86::AlignBranchBoundaryKind Value)74   void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; }
75 };
76 
77 X86AlignBranchKind X86AlignBranchKindLoc;
78 
79 cl::opt<unsigned> X86AlignBranchBoundary(
80     "x86-align-branch-boundary", cl::init(0),
81     cl::desc(
82         "Control how the assembler should align branches with NOP. If the "
83         "boundary's size is not 0, it should be a power of 2 and no less "
84         "than 32. Branches will be aligned to prevent from being across or "
85         "against the boundary of specified size. The default value 0 does not "
86         "align branches."));
87 
88 cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch(
89     "x86-align-branch",
90     cl::desc(
91         "Specify types of branches to align (plus separated list of types):"
92              "\njcc      indicates conditional jumps"
93              "\nfused    indicates fused conditional jumps"
94              "\njmp      indicates direct unconditional jumps"
95              "\ncall     indicates direct and indirect calls"
96              "\nret      indicates rets"
97              "\nindirect indicates indirect unconditional jumps"),
98     cl::location(X86AlignBranchKindLoc));
99 
100 cl::opt<bool> X86AlignBranchWithin32BBoundaries(
101     "x86-branches-within-32B-boundaries", cl::init(false),
102     cl::desc(
103         "Align selected instructions to mitigate negative performance impact "
104         "of Intel's micro code update for errata skx102.  May break "
105         "assumptions about labels corresponding to particular instructions, "
106         "and should be used with caution."));
107 
108 cl::opt<unsigned> X86PadMaxPrefixSize(
109     "x86-pad-max-prefix-size", cl::init(0),
110     cl::desc("Maximum number of prefixes to use for padding"));
111 
112 cl::opt<bool> X86PadForAlign(
113     "x86-pad-for-align", cl::init(false), cl::Hidden,
114     cl::desc("Pad previous instructions to implement align directives"));
115 
116 cl::opt<bool> X86PadForBranchAlign(
117     "x86-pad-for-branch-align", cl::init(true), cl::Hidden,
118     cl::desc("Pad previous instructions to implement branch alignment"));
119 
120 class X86AsmBackend : public MCAsmBackend {
121   const MCSubtargetInfo &STI;
122   std::unique_ptr<const MCInstrInfo> MCII;
123   X86AlignBranchKind AlignBranchType;
124   Align AlignBoundary;
125   unsigned TargetPrefixMax = 0;
126 
127   MCInst PrevInst;
128   MCBoundaryAlignFragment *PendingBA = nullptr;
129   std::pair<MCFragment *, size_t> PrevInstPosition;
130   bool CanPadInst;
131 
132   uint8_t determinePaddingPrefix(const MCInst &Inst) const;
133   bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;
134   bool needAlign(const MCInst &Inst) const;
135   bool canPadBranches(MCObjectStreamer &OS) const;
136   bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const;
137 
138 public:
X86AsmBackend(const Target & T,const MCSubtargetInfo & STI)139   X86AsmBackend(const Target &T, const MCSubtargetInfo &STI)
140       : MCAsmBackend(support::little), STI(STI),
141         MCII(T.createMCInstrInfo()) {
142     if (X86AlignBranchWithin32BBoundaries) {
143       // At the moment, this defaults to aligning fused branches, unconditional
144       // jumps, and (unfused) conditional jumps with nops.  Both the
145       // instructions aligned and the alignment method (nop vs prefix) may
146       // change in the future.
147       AlignBoundary = assumeAligned(32);;
148       AlignBranchType.addKind(X86::AlignBranchFused);
149       AlignBranchType.addKind(X86::AlignBranchJcc);
150       AlignBranchType.addKind(X86::AlignBranchJmp);
151     }
152     // Allow overriding defaults set by main flag
153     if (X86AlignBranchBoundary.getNumOccurrences())
154       AlignBoundary = assumeAligned(X86AlignBranchBoundary);
155     if (X86AlignBranch.getNumOccurrences())
156       AlignBranchType = X86AlignBranchKindLoc;
157     if (X86PadMaxPrefixSize.getNumOccurrences())
158       TargetPrefixMax = X86PadMaxPrefixSize;
159   }
160 
161   bool allowAutoPadding() const override;
162   bool allowEnhancedRelaxation() const override;
163   void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst,
164                             const MCSubtargetInfo &STI) override;
165   void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) override;
166 
getNumFixupKinds() const167   unsigned getNumFixupKinds() const override {
168     return X86::NumTargetFixupKinds;
169   }
170 
171   std::optional<MCFixupKind> getFixupKind(StringRef Name) const override;
172 
173   const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
174 
175   bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
176                              const MCValue &Target) override;
177 
178   void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
179                   const MCValue &Target, MutableArrayRef<char> Data,
180                   uint64_t Value, bool IsResolved,
181                   const MCSubtargetInfo *STI) const override;
182 
183   bool mayNeedRelaxation(const MCInst &Inst,
184                          const MCSubtargetInfo &STI) const override;
185 
186   bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
187                             const MCRelaxableFragment *DF,
188                             const MCAsmLayout &Layout) const override;
189 
190   void relaxInstruction(MCInst &Inst,
191                         const MCSubtargetInfo &STI) const override;
192 
193   bool padInstructionViaRelaxation(MCRelaxableFragment &RF,
194                                    MCCodeEmitter &Emitter,
195                                    unsigned &RemainingSize) const;
196 
197   bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
198                                unsigned &RemainingSize) const;
199 
200   bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
201                               unsigned &RemainingSize) const;
202 
203   void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override;
204 
205   unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const override;
206 
207   bool writeNopData(raw_ostream &OS, uint64_t Count,
208                     const MCSubtargetInfo *STI) const override;
209 };
210 } // end anonymous namespace
211 
getRelaxedOpcodeBranch(const MCInst & Inst,bool Is16BitMode)212 static unsigned getRelaxedOpcodeBranch(const MCInst &Inst, bool Is16BitMode) {
213   unsigned Op = Inst.getOpcode();
214   switch (Op) {
215   default:
216     return Op;
217   case X86::JCC_1:
218     return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4;
219   case X86::JMP_1:
220     return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4;
221   }
222 }
223 
getRelaxedOpcodeArith(const MCInst & Inst)224 static unsigned getRelaxedOpcodeArith(const MCInst &Inst) {
225   unsigned Op = Inst.getOpcode();
226   return X86::getRelaxedOpcodeArith(Op);
227 }
228 
getRelaxedOpcode(const MCInst & Inst,bool Is16BitMode)229 static unsigned getRelaxedOpcode(const MCInst &Inst, bool Is16BitMode) {
230   unsigned R = getRelaxedOpcodeArith(Inst);
231   if (R != Inst.getOpcode())
232     return R;
233   return getRelaxedOpcodeBranch(Inst, Is16BitMode);
234 }
235 
getCondFromBranch(const MCInst & MI,const MCInstrInfo & MCII)236 static X86::CondCode getCondFromBranch(const MCInst &MI,
237                                        const MCInstrInfo &MCII) {
238   unsigned Opcode = MI.getOpcode();
239   switch (Opcode) {
240   default:
241     return X86::COND_INVALID;
242   case X86::JCC_1: {
243     const MCInstrDesc &Desc = MCII.get(Opcode);
244     return static_cast<X86::CondCode>(
245         MI.getOperand(Desc.getNumOperands() - 1).getImm());
246   }
247   }
248 }
249 
250 static X86::SecondMacroFusionInstKind
classifySecondInstInMacroFusion(const MCInst & MI,const MCInstrInfo & MCII)251 classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) {
252   X86::CondCode CC = getCondFromBranch(MI, MCII);
253   return classifySecondCondCodeInMacroFusion(CC);
254 }
255 
256 /// Check if the instruction uses RIP relative addressing.
isRIPRelative(const MCInst & MI,const MCInstrInfo & MCII)257 static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) {
258   unsigned Opcode = MI.getOpcode();
259   const MCInstrDesc &Desc = MCII.get(Opcode);
260   uint64_t TSFlags = Desc.TSFlags;
261   unsigned CurOp = X86II::getOperandBias(Desc);
262   int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
263   if (MemoryOperand < 0)
264     return false;
265   unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg;
266   unsigned BaseReg = MI.getOperand(BaseRegNum).getReg();
267   return (BaseReg == X86::RIP);
268 }
269 
270 /// Check if the instruction is a prefix.
isPrefix(const MCInst & MI,const MCInstrInfo & MCII)271 static bool isPrefix(const MCInst &MI, const MCInstrInfo &MCII) {
272   return X86II::isPrefix(MCII.get(MI.getOpcode()).TSFlags);
273 }
274 
275 /// Check if the instruction is valid as the first instruction in macro fusion.
isFirstMacroFusibleInst(const MCInst & Inst,const MCInstrInfo & MCII)276 static bool isFirstMacroFusibleInst(const MCInst &Inst,
277                                     const MCInstrInfo &MCII) {
278   // An Intel instruction with RIP relative addressing is not macro fusible.
279   if (isRIPRelative(Inst, MCII))
280     return false;
281   X86::FirstMacroFusionInstKind FIK =
282       X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode());
283   return FIK != X86::FirstMacroFusionInstKind::Invalid;
284 }
285 
286 /// X86 can reduce the bytes of NOP by padding instructions with prefixes to
287 /// get a better peformance in some cases. Here, we determine which prefix is
288 /// the most suitable.
289 ///
290 /// If the instruction has a segment override prefix, use the existing one.
291 /// If the target is 64-bit, use the CS.
292 /// If the target is 32-bit,
293 ///   - If the instruction has a ESP/EBP base register, use SS.
294 ///   - Otherwise use DS.
determinePaddingPrefix(const MCInst & Inst) const295 uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const {
296   assert((STI.hasFeature(X86::Is32Bit) || STI.hasFeature(X86::Is64Bit)) &&
297          "Prefixes can be added only in 32-bit or 64-bit mode.");
298   const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
299   uint64_t TSFlags = Desc.TSFlags;
300 
301   // Determine where the memory operand starts, if present.
302   int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
303   if (MemoryOperand != -1)
304     MemoryOperand += X86II::getOperandBias(Desc);
305 
306   unsigned SegmentReg = 0;
307   if (MemoryOperand >= 0) {
308     // Check for explicit segment override on memory operand.
309     SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg();
310   }
311 
312   switch (TSFlags & X86II::FormMask) {
313   default:
314     break;
315   case X86II::RawFrmDstSrc: {
316     // Check segment override opcode prefix as needed (not for %ds).
317     if (Inst.getOperand(2).getReg() != X86::DS)
318       SegmentReg = Inst.getOperand(2).getReg();
319     break;
320   }
321   case X86II::RawFrmSrc: {
322     // Check segment override opcode prefix as needed (not for %ds).
323     if (Inst.getOperand(1).getReg() != X86::DS)
324       SegmentReg = Inst.getOperand(1).getReg();
325     break;
326   }
327   case X86II::RawFrmMemOffs: {
328     // Check segment override opcode prefix as needed.
329     SegmentReg = Inst.getOperand(1).getReg();
330     break;
331   }
332   }
333 
334   if (SegmentReg != 0)
335     return X86::getSegmentOverridePrefixForReg(SegmentReg);
336 
337   if (STI.hasFeature(X86::Is64Bit))
338     return X86::CS_Encoding;
339 
340   if (MemoryOperand >= 0) {
341     unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg;
342     unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg();
343     if (BaseReg == X86::ESP || BaseReg == X86::EBP)
344       return X86::SS_Encoding;
345   }
346   return X86::DS_Encoding;
347 }
348 
349 /// Check if the two instructions will be macro-fused on the target cpu.
isMacroFused(const MCInst & Cmp,const MCInst & Jcc) const350 bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const {
351   const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode());
352   if (!InstDesc.isConditionalBranch())
353     return false;
354   if (!isFirstMacroFusibleInst(Cmp, *MCII))
355     return false;
356   const X86::FirstMacroFusionInstKind CmpKind =
357       X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode());
358   const X86::SecondMacroFusionInstKind BranchKind =
359       classifySecondInstInMacroFusion(Jcc, *MCII);
360   return X86::isMacroFused(CmpKind, BranchKind);
361 }
362 
363 /// Check if the instruction has a variant symbol operand.
hasVariantSymbol(const MCInst & MI)364 static bool hasVariantSymbol(const MCInst &MI) {
365   for (auto &Operand : MI) {
366     if (!Operand.isExpr())
367       continue;
368     const MCExpr &Expr = *Operand.getExpr();
369     if (Expr.getKind() == MCExpr::SymbolRef &&
370         cast<MCSymbolRefExpr>(Expr).getKind() != MCSymbolRefExpr::VK_None)
371       return true;
372   }
373   return false;
374 }
375 
allowAutoPadding() const376 bool X86AsmBackend::allowAutoPadding() const {
377   return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone);
378 }
379 
allowEnhancedRelaxation() const380 bool X86AsmBackend::allowEnhancedRelaxation() const {
381   return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign;
382 }
383 
384 /// X86 has certain instructions which enable interrupts exactly one
385 /// instruction *after* the instruction which stores to SS.  Return true if the
386 /// given instruction has such an interrupt delay slot.
hasInterruptDelaySlot(const MCInst & Inst)387 static bool hasInterruptDelaySlot(const MCInst &Inst) {
388   switch (Inst.getOpcode()) {
389   case X86::POPSS16:
390   case X86::POPSS32:
391   case X86::STI:
392     return true;
393 
394   case X86::MOV16sr:
395   case X86::MOV32sr:
396   case X86::MOV64sr:
397   case X86::MOV16sm:
398     if (Inst.getOperand(0).getReg() == X86::SS)
399       return true;
400     break;
401   }
402   return false;
403 }
404 
405 /// Check if the instruction to be emitted is right after any data.
406 static bool
isRightAfterData(MCFragment * CurrentFragment,const std::pair<MCFragment *,size_t> & PrevInstPosition)407 isRightAfterData(MCFragment *CurrentFragment,
408                  const std::pair<MCFragment *, size_t> &PrevInstPosition) {
409   MCFragment *F = CurrentFragment;
410   // Empty data fragments may be created to prevent further data being
411   // added into the previous fragment, we need to skip them since they
412   // have no contents.
413   for (; isa_and_nonnull<MCDataFragment>(F); F = F->getPrevNode())
414     if (cast<MCDataFragment>(F)->getContents().size() != 0)
415       break;
416 
417   // Since data is always emitted into a DataFragment, our check strategy is
418   // simple here.
419   //   - If the fragment is a DataFragment
420   //     - If it's not the fragment where the previous instruction is,
421   //       returns true.
422   //     - If it's the fragment holding the previous instruction but its
423   //       size changed since the the previous instruction was emitted into
424   //       it, returns true.
425   //     - Otherwise returns false.
426   //   - If the fragment is not a DataFragment, returns false.
427   if (auto *DF = dyn_cast_or_null<MCDataFragment>(F))
428     return DF != PrevInstPosition.first ||
429            DF->getContents().size() != PrevInstPosition.second;
430 
431   return false;
432 }
433 
434 /// \returns the fragment size if it has instructions, otherwise returns 0.
getSizeForInstFragment(const MCFragment * F)435 static size_t getSizeForInstFragment(const MCFragment *F) {
436   if (!F || !F->hasInstructions())
437     return 0;
438   // MCEncodedFragmentWithContents being templated makes this tricky.
439   switch (F->getKind()) {
440   default:
441     llvm_unreachable("Unknown fragment with instructions!");
442   case MCFragment::FT_Data:
443     return cast<MCDataFragment>(*F).getContents().size();
444   case MCFragment::FT_Relaxable:
445     return cast<MCRelaxableFragment>(*F).getContents().size();
446   case MCFragment::FT_CompactEncodedInst:
447     return cast<MCCompactEncodedInstFragment>(*F).getContents().size();
448   }
449 }
450 
451 /// Return true if we can insert NOP or prefixes automatically before the
452 /// the instruction to be emitted.
canPadInst(const MCInst & Inst,MCObjectStreamer & OS) const453 bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const {
454   if (hasVariantSymbol(Inst))
455     // Linker may rewrite the instruction with variant symbol operand(e.g.
456     // TLSCALL).
457     return false;
458 
459   if (hasInterruptDelaySlot(PrevInst))
460     // If this instruction follows an interrupt enabling instruction with a one
461     // instruction delay, inserting a nop would change behavior.
462     return false;
463 
464   if (isPrefix(PrevInst, *MCII))
465     // If this instruction follows a prefix, inserting a nop/prefix would change
466     // semantic.
467     return false;
468 
469   if (isPrefix(Inst, *MCII))
470     // If this instruction is a prefix, inserting a prefix would change
471     // semantic.
472     return false;
473 
474   if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition))
475     // If this instruction follows any data, there is no clear
476     // instruction boundary, inserting a nop/prefix would change semantic.
477     return false;
478 
479   return true;
480 }
481 
canPadBranches(MCObjectStreamer & OS) const482 bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const {
483   if (!OS.getAllowAutoPadding())
484     return false;
485   assert(allowAutoPadding() && "incorrect initialization!");
486 
487   // We only pad in text section.
488   if (!OS.getCurrentSectionOnly()->getKind().isText())
489     return false;
490 
491   // To be Done: Currently don't deal with Bundle cases.
492   if (OS.getAssembler().isBundlingEnabled())
493     return false;
494 
495   // Branches only need to be aligned in 32-bit or 64-bit mode.
496   if (!(STI.hasFeature(X86::Is64Bit) || STI.hasFeature(X86::Is32Bit)))
497     return false;
498 
499   return true;
500 }
501 
502 /// Check if the instruction operand needs to be aligned.
needAlign(const MCInst & Inst) const503 bool X86AsmBackend::needAlign(const MCInst &Inst) const {
504   const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
505   return (Desc.isConditionalBranch() &&
506           (AlignBranchType & X86::AlignBranchJcc)) ||
507          (Desc.isUnconditionalBranch() &&
508           (AlignBranchType & X86::AlignBranchJmp)) ||
509          (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) ||
510          (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) ||
511          (Desc.isIndirectBranch() &&
512           (AlignBranchType & X86::AlignBranchIndirect));
513 }
514 
515 /// Insert BoundaryAlignFragment before instructions to align branches.
emitInstructionBegin(MCObjectStreamer & OS,const MCInst & Inst,const MCSubtargetInfo & STI)516 void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
517                                          const MCInst &Inst, const MCSubtargetInfo &STI) {
518   CanPadInst = canPadInst(Inst, OS);
519 
520   if (!canPadBranches(OS))
521     return;
522 
523   if (!isMacroFused(PrevInst, Inst))
524     // Macro fusion doesn't happen indeed, clear the pending.
525     PendingBA = nullptr;
526 
527   if (!CanPadInst)
528     return;
529 
530   if (PendingBA && OS.getCurrentFragment()->getPrevNode() == PendingBA) {
531     // Macro fusion actually happens and there is no other fragment inserted
532     // after the previous instruction.
533     //
534     // Do nothing here since we already inserted a BoudaryAlign fragment when
535     // we met the first instruction in the fused pair and we'll tie them
536     // together in emitInstructionEnd.
537     //
538     // Note: When there is at least one fragment, such as MCAlignFragment,
539     // inserted after the previous instruction, e.g.
540     //
541     // \code
542     //   cmp %rax %rcx
543     //   .align 16
544     //   je .Label0
545     // \ endcode
546     //
547     // We will treat the JCC as a unfused branch although it may be fused
548     // with the CMP.
549     return;
550   }
551 
552   if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) &&
553                           isFirstMacroFusibleInst(Inst, *MCII))) {
554     // If we meet a unfused branch or the first instuction in a fusiable pair,
555     // insert a BoundaryAlign fragment.
556     OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary, STI));
557   }
558 }
559 
560 /// Set the last fragment to be aligned for the BoundaryAlignFragment.
emitInstructionEnd(MCObjectStreamer & OS,const MCInst & Inst)561 void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) {
562   PrevInst = Inst;
563   MCFragment *CF = OS.getCurrentFragment();
564   PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF));
565   if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF))
566     F->setAllowAutoPadding(CanPadInst);
567 
568   if (!canPadBranches(OS))
569     return;
570 
571   if (!needAlign(Inst) || !PendingBA)
572     return;
573 
574   // Tie the aligned instructions into a a pending BoundaryAlign.
575   PendingBA->setLastFragment(CF);
576   PendingBA = nullptr;
577 
578   // We need to ensure that further data isn't added to the current
579   // DataFragment, so that we can get the size of instructions later in
580   // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty
581   // DataFragment.
582   if (isa_and_nonnull<MCDataFragment>(CF))
583     OS.insert(new MCDataFragment());
584 
585   // Update the maximum alignment on the current section if necessary.
586   MCSection *Sec = OS.getCurrentSectionOnly();
587   Sec->ensureMinAlignment(AlignBoundary);
588 }
589 
getFixupKind(StringRef Name) const590 std::optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const {
591   if (STI.getTargetTriple().isOSBinFormatELF()) {
592     unsigned Type;
593     if (STI.getTargetTriple().getArch() == Triple::x86_64) {
594       Type = llvm::StringSwitch<unsigned>(Name)
595 #define ELF_RELOC(X, Y) .Case(#X, Y)
596 #include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
597 #undef ELF_RELOC
598                  .Case("BFD_RELOC_NONE", ELF::R_X86_64_NONE)
599                  .Case("BFD_RELOC_8", ELF::R_X86_64_8)
600                  .Case("BFD_RELOC_16", ELF::R_X86_64_16)
601                  .Case("BFD_RELOC_32", ELF::R_X86_64_32)
602                  .Case("BFD_RELOC_64", ELF::R_X86_64_64)
603                  .Default(-1u);
604     } else {
605       Type = llvm::StringSwitch<unsigned>(Name)
606 #define ELF_RELOC(X, Y) .Case(#X, Y)
607 #include "llvm/BinaryFormat/ELFRelocs/i386.def"
608 #undef ELF_RELOC
609                  .Case("BFD_RELOC_NONE", ELF::R_386_NONE)
610                  .Case("BFD_RELOC_8", ELF::R_386_8)
611                  .Case("BFD_RELOC_16", ELF::R_386_16)
612                  .Case("BFD_RELOC_32", ELF::R_386_32)
613                  .Default(-1u);
614     }
615     if (Type == -1u)
616       return std::nullopt;
617     return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
618   }
619   return MCAsmBackend::getFixupKind(Name);
620 }
621 
getFixupKindInfo(MCFixupKind Kind) const622 const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
623   const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
624       {"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
625       {"reloc_riprel_4byte_movq_load", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
626       {"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
627       {"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
628       {"reloc_signed_4byte", 0, 32, 0},
629       {"reloc_signed_4byte_relax", 0, 32, 0},
630       {"reloc_global_offset_table", 0, 32, 0},
631       {"reloc_global_offset_table8", 0, 64, 0},
632       {"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
633   };
634 
635   // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They
636   // do not require any extra processing.
637   if (Kind >= FirstLiteralRelocationKind)
638     return MCAsmBackend::getFixupKindInfo(FK_NONE);
639 
640   if (Kind < FirstTargetFixupKind)
641     return MCAsmBackend::getFixupKindInfo(Kind);
642 
643   assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
644          "Invalid kind!");
645   assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!");
646   return Infos[Kind - FirstTargetFixupKind];
647 }
648 
shouldForceRelocation(const MCAssembler &,const MCFixup & Fixup,const MCValue &)649 bool X86AsmBackend::shouldForceRelocation(const MCAssembler &,
650                                           const MCFixup &Fixup,
651                                           const MCValue &) {
652   return Fixup.getKind() >= FirstLiteralRelocationKind;
653 }
654 
getFixupKindSize(unsigned Kind)655 static unsigned getFixupKindSize(unsigned Kind) {
656   switch (Kind) {
657   default:
658     llvm_unreachable("invalid fixup kind!");
659   case FK_NONE:
660     return 0;
661   case FK_PCRel_1:
662   case FK_SecRel_1:
663   case FK_Data_1:
664     return 1;
665   case FK_PCRel_2:
666   case FK_SecRel_2:
667   case FK_Data_2:
668     return 2;
669   case FK_PCRel_4:
670   case X86::reloc_riprel_4byte:
671   case X86::reloc_riprel_4byte_relax:
672   case X86::reloc_riprel_4byte_relax_rex:
673   case X86::reloc_riprel_4byte_movq_load:
674   case X86::reloc_signed_4byte:
675   case X86::reloc_signed_4byte_relax:
676   case X86::reloc_global_offset_table:
677   case X86::reloc_branch_4byte_pcrel:
678   case FK_SecRel_4:
679   case FK_Data_4:
680     return 4;
681   case FK_PCRel_8:
682   case FK_SecRel_8:
683   case FK_Data_8:
684   case X86::reloc_global_offset_table8:
685     return 8;
686   }
687 }
688 
applyFixup(const MCAssembler & Asm,const MCFixup & Fixup,const MCValue & Target,MutableArrayRef<char> Data,uint64_t Value,bool IsResolved,const MCSubtargetInfo * STI) const689 void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
690                                const MCValue &Target,
691                                MutableArrayRef<char> Data,
692                                uint64_t Value, bool IsResolved,
693                                const MCSubtargetInfo *STI) const {
694   unsigned Kind = Fixup.getKind();
695   if (Kind >= FirstLiteralRelocationKind)
696     return;
697   unsigned Size = getFixupKindSize(Kind);
698 
699   assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
700 
701   int64_t SignedValue = static_cast<int64_t>(Value);
702   if ((Target.isAbsolute() || IsResolved) &&
703       getFixupKindInfo(Fixup.getKind()).Flags &
704       MCFixupKindInfo::FKF_IsPCRel) {
705     // check that PC relative fixup fits into the fixup size.
706     if (Size > 0 && !isIntN(Size * 8, SignedValue))
707       Asm.getContext().reportError(
708                                    Fixup.getLoc(), "value of " + Twine(SignedValue) +
709                                    " is too large for field of " + Twine(Size) +
710                                    ((Size == 1) ? " byte." : " bytes."));
711   } else {
712     // Check that uppper bits are either all zeros or all ones.
713     // Specifically ignore overflow/underflow as long as the leakage is
714     // limited to the lower bits. This is to remain compatible with
715     // other assemblers.
716     assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) &&
717            "Value does not fit in the Fixup field");
718   }
719 
720   for (unsigned i = 0; i != Size; ++i)
721     Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
722 }
723 
mayNeedRelaxation(const MCInst & Inst,const MCSubtargetInfo & STI) const724 bool X86AsmBackend::mayNeedRelaxation(const MCInst &Inst,
725                                       const MCSubtargetInfo &STI) const {
726   // Branches can always be relaxed in either mode.
727   if (getRelaxedOpcodeBranch(Inst, false) != Inst.getOpcode())
728     return true;
729 
730   // Check if this instruction is ever relaxable.
731   if (getRelaxedOpcodeArith(Inst) == Inst.getOpcode())
732     return false;
733 
734 
735   // Check if the relaxable operand has an expression. For the current set of
736   // relaxable instructions, the relaxable operand is always the last operand.
737   unsigned RelaxableOp = Inst.getNumOperands() - 1;
738   if (Inst.getOperand(RelaxableOp).isExpr())
739     return true;
740 
741   return false;
742 }
743 
fixupNeedsRelaxation(const MCFixup & Fixup,uint64_t Value,const MCRelaxableFragment * DF,const MCAsmLayout & Layout) const744 bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
745                                          uint64_t Value,
746                                          const MCRelaxableFragment *DF,
747                                          const MCAsmLayout &Layout) const {
748   // Relax if the value is too big for a (signed) i8.
749   return !isInt<8>(Value);
750 }
751 
752 // FIXME: Can tblgen help at all here to verify there aren't other instructions
753 // we can relax?
relaxInstruction(MCInst & Inst,const MCSubtargetInfo & STI) const754 void X86AsmBackend::relaxInstruction(MCInst &Inst,
755                                      const MCSubtargetInfo &STI) const {
756   // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
757   bool Is16BitMode = STI.getFeatureBits()[X86::Is16Bit];
758   unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode);
759 
760   if (RelaxedOp == Inst.getOpcode()) {
761     SmallString<256> Tmp;
762     raw_svector_ostream OS(Tmp);
763     Inst.dump_pretty(OS);
764     OS << "\n";
765     report_fatal_error("unexpected instruction to relax: " + OS.str());
766   }
767 
768   Inst.setOpcode(RelaxedOp);
769 }
770 
771 /// Return true if this instruction has been fully relaxed into it's most
772 /// general available form.
isFullyRelaxed(const MCRelaxableFragment & RF)773 static bool isFullyRelaxed(const MCRelaxableFragment &RF) {
774   auto &Inst = RF.getInst();
775   auto &STI = *RF.getSubtargetInfo();
776   bool Is16BitMode = STI.getFeatureBits()[X86::Is16Bit];
777   return getRelaxedOpcode(Inst, Is16BitMode) == Inst.getOpcode();
778 }
779 
padInstructionViaPrefix(MCRelaxableFragment & RF,MCCodeEmitter & Emitter,unsigned & RemainingSize) const780 bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF,
781                                             MCCodeEmitter &Emitter,
782                                             unsigned &RemainingSize) const {
783   if (!RF.getAllowAutoPadding())
784     return false;
785   // If the instruction isn't fully relaxed, shifting it around might require a
786   // larger value for one of the fixups then can be encoded.  The outer loop
787   // will also catch this before moving to the next instruction, but we need to
788   // prevent padding this single instruction as well.
789   if (!isFullyRelaxed(RF))
790     return false;
791 
792   const unsigned OldSize = RF.getContents().size();
793   if (OldSize == 15)
794     return false;
795 
796   const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize);
797   const unsigned RemainingPrefixSize = [&]() -> unsigned {
798     SmallString<15> Code;
799     raw_svector_ostream VecOS(Code);
800     Emitter.emitPrefix(RF.getInst(), VecOS, STI);
801     assert(Code.size() < 15 && "The number of prefixes must be less than 15.");
802 
803     // TODO: It turns out we need a decent amount of plumbing for the target
804     // specific bits to determine number of prefixes its safe to add.  Various
805     // targets (older chips mostly, but also Atom family) encounter decoder
806     // stalls with too many prefixes.  For testing purposes, we set the value
807     // externally for the moment.
808     unsigned ExistingPrefixSize = Code.size();
809     if (TargetPrefixMax <= ExistingPrefixSize)
810       return 0;
811     return TargetPrefixMax - ExistingPrefixSize;
812   }();
813   const unsigned PrefixBytesToAdd =
814       std::min(MaxPossiblePad, RemainingPrefixSize);
815   if (PrefixBytesToAdd == 0)
816     return false;
817 
818   const uint8_t Prefix = determinePaddingPrefix(RF.getInst());
819 
820   SmallString<256> Code;
821   Code.append(PrefixBytesToAdd, Prefix);
822   Code.append(RF.getContents().begin(), RF.getContents().end());
823   RF.getContents() = Code;
824 
825   // Adjust the fixups for the change in offsets
826   for (auto &F : RF.getFixups()) {
827     F.setOffset(F.getOffset() + PrefixBytesToAdd);
828   }
829 
830   RemainingSize -= PrefixBytesToAdd;
831   return true;
832 }
833 
padInstructionViaRelaxation(MCRelaxableFragment & RF,MCCodeEmitter & Emitter,unsigned & RemainingSize) const834 bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF,
835                                                 MCCodeEmitter &Emitter,
836                                                 unsigned &RemainingSize) const {
837   if (isFullyRelaxed(RF))
838     // TODO: There are lots of other tricks we could apply for increasing
839     // encoding size without impacting performance.
840     return false;
841 
842   MCInst Relaxed = RF.getInst();
843   relaxInstruction(Relaxed, *RF.getSubtargetInfo());
844 
845   SmallVector<MCFixup, 4> Fixups;
846   SmallString<15> Code;
847   raw_svector_ostream VecOS(Code);
848   Emitter.encodeInstruction(Relaxed, VecOS, Fixups, *RF.getSubtargetInfo());
849   const unsigned OldSize = RF.getContents().size();
850   const unsigned NewSize = Code.size();
851   assert(NewSize >= OldSize && "size decrease during relaxation?");
852   unsigned Delta = NewSize - OldSize;
853   if (Delta > RemainingSize)
854     return false;
855   RF.setInst(Relaxed);
856   RF.getContents() = Code;
857   RF.getFixups() = Fixups;
858   RemainingSize -= Delta;
859   return true;
860 }
861 
padInstructionEncoding(MCRelaxableFragment & RF,MCCodeEmitter & Emitter,unsigned & RemainingSize) const862 bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF,
863                                            MCCodeEmitter &Emitter,
864                                            unsigned &RemainingSize) const {
865   bool Changed = false;
866   if (RemainingSize != 0)
867     Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize);
868   if (RemainingSize != 0)
869     Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize);
870   return Changed;
871 }
872 
finishLayout(MCAssembler const & Asm,MCAsmLayout & Layout) const873 void X86AsmBackend::finishLayout(MCAssembler const &Asm,
874                                  MCAsmLayout &Layout) const {
875   // See if we can further relax some instructions to cut down on the number of
876   // nop bytes required for code alignment.  The actual win is in reducing
877   // instruction count, not number of bytes.  Modern X86-64 can easily end up
878   // decode limited.  It is often better to reduce the number of instructions
879   // (i.e. eliminate nops) even at the cost of increasing the size and
880   // complexity of others.
881   if (!X86PadForAlign && !X86PadForBranchAlign)
882     return;
883 
884   // The processed regions are delimitered by LabeledFragments. -g may have more
885   // MCSymbols and therefore different relaxation results. X86PadForAlign is
886   // disabled by default to eliminate the -g vs non -g difference.
887   DenseSet<MCFragment *> LabeledFragments;
888   for (const MCSymbol &S : Asm.symbols())
889     LabeledFragments.insert(S.getFragment(false));
890 
891   for (MCSection &Sec : Asm) {
892     if (!Sec.getKind().isText())
893       continue;
894 
895     SmallVector<MCRelaxableFragment *, 4> Relaxable;
896     for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) {
897       MCFragment &F = *I;
898 
899       if (LabeledFragments.count(&F))
900         Relaxable.clear();
901 
902       if (F.getKind() == MCFragment::FT_Data ||
903           F.getKind() == MCFragment::FT_CompactEncodedInst)
904         // Skip and ignore
905         continue;
906 
907       if (F.getKind() == MCFragment::FT_Relaxable) {
908         auto &RF = cast<MCRelaxableFragment>(*I);
909         Relaxable.push_back(&RF);
910         continue;
911       }
912 
913       auto canHandle = [](MCFragment &F) -> bool {
914         switch (F.getKind()) {
915         default:
916           return false;
917         case MCFragment::FT_Align:
918           return X86PadForAlign;
919         case MCFragment::FT_BoundaryAlign:
920           return X86PadForBranchAlign;
921         }
922       };
923       // For any unhandled kind, assume we can't change layout.
924       if (!canHandle(F)) {
925         Relaxable.clear();
926         continue;
927       }
928 
929 #ifndef NDEBUG
930       const uint64_t OrigOffset = Layout.getFragmentOffset(&F);
931 #endif
932       const uint64_t OrigSize = Asm.computeFragmentSize(Layout, F);
933 
934       // To keep the effects local, prefer to relax instructions closest to
935       // the align directive.  This is purely about human understandability
936       // of the resulting code.  If we later find a reason to expand
937       // particular instructions over others, we can adjust.
938       MCFragment *FirstChangedFragment = nullptr;
939       unsigned RemainingSize = OrigSize;
940       while (!Relaxable.empty() && RemainingSize != 0) {
941         auto &RF = *Relaxable.pop_back_val();
942         // Give the backend a chance to play any tricks it wishes to increase
943         // the encoding size of the given instruction.  Target independent code
944         // will try further relaxation, but target's may play further tricks.
945         if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize))
946           FirstChangedFragment = &RF;
947 
948         // If we have an instruction which hasn't been fully relaxed, we can't
949         // skip past it and insert bytes before it.  Changing its starting
950         // offset might require a larger negative offset than it can encode.
951         // We don't need to worry about larger positive offsets as none of the
952         // possible offsets between this and our align are visible, and the
953         // ones afterwards aren't changing.
954         if (!isFullyRelaxed(RF))
955           break;
956       }
957       Relaxable.clear();
958 
959       if (FirstChangedFragment) {
960         // Make sure the offsets for any fragments in the effected range get
961         // updated.  Note that this (conservatively) invalidates the offsets of
962         // those following, but this is not required.
963         Layout.invalidateFragmentsFrom(FirstChangedFragment);
964       }
965 
966       // BoundaryAlign explicitly tracks it's size (unlike align)
967       if (F.getKind() == MCFragment::FT_BoundaryAlign)
968         cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize);
969 
970 #ifndef NDEBUG
971       const uint64_t FinalOffset = Layout.getFragmentOffset(&F);
972       const uint64_t FinalSize = Asm.computeFragmentSize(Layout, F);
973       assert(OrigOffset + OrigSize == FinalOffset + FinalSize &&
974              "can't move start of next fragment!");
975       assert(FinalSize == RemainingSize && "inconsistent size computation?");
976 #endif
977 
978       // If we're looking at a boundary align, make sure we don't try to pad
979       // its target instructions for some following directive.  Doing so would
980       // break the alignment of the current boundary align.
981       if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) {
982         const MCFragment *LastFragment = BF->getLastFragment();
983         if (!LastFragment)
984           continue;
985         while (&*I != LastFragment)
986           ++I;
987       }
988     }
989   }
990 
991   // The layout is done. Mark every fragment as valid.
992   for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
993     MCSection &Section = *Layout.getSectionOrder()[i];
994     Layout.getFragmentOffset(&*Section.getFragmentList().rbegin());
995     Asm.computeFragmentSize(Layout, *Section.getFragmentList().rbegin());
996   }
997 }
998 
getMaximumNopSize(const MCSubtargetInfo & STI) const999 unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const {
1000   if (STI.hasFeature(X86::Is16Bit))
1001     return 4;
1002   if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Is64Bit))
1003     return 1;
1004   if (STI.getFeatureBits()[X86::TuningFast7ByteNOP])
1005     return 7;
1006   if (STI.getFeatureBits()[X86::TuningFast15ByteNOP])
1007     return 15;
1008   if (STI.getFeatureBits()[X86::TuningFast11ByteNOP])
1009     return 11;
1010   // FIXME: handle 32-bit mode
1011   // 15-bytes is the longest single NOP instruction, but 10-bytes is
1012   // commonly the longest that can be efficiently decoded.
1013   return 10;
1014 }
1015 
1016 /// Write a sequence of optimal nops to the output, covering \p Count
1017 /// bytes.
1018 /// \return - true on success, false on failure
writeNopData(raw_ostream & OS,uint64_t Count,const MCSubtargetInfo * STI) const1019 bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
1020                                  const MCSubtargetInfo *STI) const {
1021 
1022   // Write 1 or 2 byte NOP sequences, or a longer trapsled, until
1023   // we have written Count bytes
1024   do {
1025     const uint8_t ThisNopLength = (uint8_t) std::min(Count, (uint64_t)127);
1026     switch (ThisNopLength) {
1027       case 0: break;
1028       case 1: OS << '\x90';
1029               break;
1030       case 2: OS << '\x66';
1031               OS << '\x90';
1032               break;
1033       default: OS << '\xEB';
1034                OS << (uint8_t)(ThisNopLength - 2);
1035                for(uint8_t i = 2; i < ThisNopLength; ++i)
1036                  OS << '\xCC';
1037     }
1038     Count -= ThisNopLength;
1039   } while (Count != 0);
1040 
1041   return true;
1042 }
1043 
1044 /* *** */
1045 
1046 namespace {
1047 
1048 class ELFX86AsmBackend : public X86AsmBackend {
1049 public:
1050   uint8_t OSABI;
ELFX86AsmBackend(const Target & T,uint8_t OSABI,const MCSubtargetInfo & STI)1051   ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI)
1052       : X86AsmBackend(T, STI), OSABI(OSABI) {}
1053 };
1054 
1055 class ELFX86_32AsmBackend : public ELFX86AsmBackend {
1056 public:
ELFX86_32AsmBackend(const Target & T,uint8_t OSABI,const MCSubtargetInfo & STI)1057   ELFX86_32AsmBackend(const Target &T, uint8_t OSABI,
1058                       const MCSubtargetInfo &STI)
1059     : ELFX86AsmBackend(T, OSABI, STI) {}
1060 
1061   std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const1062   createObjectTargetWriter() const override {
1063     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, ELF::EM_386);
1064   }
1065 };
1066 
1067 class ELFX86_X32AsmBackend : public ELFX86AsmBackend {
1068 public:
ELFX86_X32AsmBackend(const Target & T,uint8_t OSABI,const MCSubtargetInfo & STI)1069   ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI,
1070                        const MCSubtargetInfo &STI)
1071       : ELFX86AsmBackend(T, OSABI, STI) {}
1072 
1073   std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const1074   createObjectTargetWriter() const override {
1075     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1076                                     ELF::EM_X86_64);
1077   }
1078 };
1079 
1080 class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend {
1081 public:
ELFX86_IAMCUAsmBackend(const Target & T,uint8_t OSABI,const MCSubtargetInfo & STI)1082   ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI,
1083                          const MCSubtargetInfo &STI)
1084       : ELFX86AsmBackend(T, OSABI, STI) {}
1085 
1086   std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const1087   createObjectTargetWriter() const override {
1088     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1089                                     ELF::EM_IAMCU);
1090   }
1091 };
1092 
1093 class ELFX86_64AsmBackend : public ELFX86AsmBackend {
1094 public:
ELFX86_64AsmBackend(const Target & T,uint8_t OSABI,const MCSubtargetInfo & STI)1095   ELFX86_64AsmBackend(const Target &T, uint8_t OSABI,
1096                       const MCSubtargetInfo &STI)
1097     : ELFX86AsmBackend(T, OSABI, STI) {}
1098 
1099   std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const1100   createObjectTargetWriter() const override {
1101     return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI, ELF::EM_X86_64);
1102   }
1103 };
1104 
1105 class WindowsX86AsmBackend : public X86AsmBackend {
1106   bool Is64Bit;
1107 
1108 public:
WindowsX86AsmBackend(const Target & T,bool is64Bit,const MCSubtargetInfo & STI)1109   WindowsX86AsmBackend(const Target &T, bool is64Bit,
1110                        const MCSubtargetInfo &STI)
1111     : X86AsmBackend(T, STI)
1112     , Is64Bit(is64Bit) {
1113   }
1114 
getFixupKind(StringRef Name) const1115   std::optional<MCFixupKind> getFixupKind(StringRef Name) const override {
1116     return StringSwitch<std::optional<MCFixupKind>>(Name)
1117         .Case("dir32", FK_Data_4)
1118         .Case("secrel32", FK_SecRel_4)
1119         .Case("secidx", FK_SecRel_2)
1120         .Default(MCAsmBackend::getFixupKind(Name));
1121   }
1122 
1123   std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const1124   createObjectTargetWriter() const override {
1125     return createX86WinCOFFObjectWriter(Is64Bit);
1126   }
1127 };
1128 
1129 namespace CU {
1130 
1131   /// Compact unwind encoding values.
1132   enum CompactUnwindEncodings {
1133     /// [RE]BP based frame where [RE]BP is pused on the stack immediately after
1134     /// the return address, then [RE]SP is moved to [RE]BP.
1135     UNWIND_MODE_BP_FRAME                   = 0x01000000,
1136 
1137     /// A frameless function with a small constant stack size.
1138     UNWIND_MODE_STACK_IMMD                 = 0x02000000,
1139 
1140     /// A frameless function with a large constant stack size.
1141     UNWIND_MODE_STACK_IND                  = 0x03000000,
1142 
1143     /// No compact unwind encoding is available.
1144     UNWIND_MODE_DWARF                      = 0x04000000,
1145 
1146     /// Mask for encoding the frame registers.
1147     UNWIND_BP_FRAME_REGISTERS              = 0x00007FFF,
1148 
1149     /// Mask for encoding the frameless registers.
1150     UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
1151   };
1152 
1153 } // namespace CU
1154 
1155 class DarwinX86AsmBackend : public X86AsmBackend {
1156   const MCRegisterInfo &MRI;
1157 
1158   /// Number of registers that can be saved in a compact unwind encoding.
1159   enum { CU_NUM_SAVED_REGS = 6 };
1160 
1161   mutable unsigned SavedRegs[CU_NUM_SAVED_REGS];
1162   Triple TT;
1163   bool Is64Bit;
1164 
1165   unsigned OffsetSize;                   ///< Offset of a "push" instruction.
1166   unsigned MoveInstrSize;                ///< Size of a "move" instruction.
1167   unsigned StackDivide;                  ///< Amount to adjust stack size by.
1168 protected:
1169   /// Size of a "push" instruction for the given register.
PushInstrSize(unsigned Reg) const1170   unsigned PushInstrSize(unsigned Reg) const {
1171     switch (Reg) {
1172       case X86::EBX:
1173       case X86::ECX:
1174       case X86::EDX:
1175       case X86::EDI:
1176       case X86::ESI:
1177       case X86::EBP:
1178       case X86::RBX:
1179       case X86::RBP:
1180         return 1;
1181       case X86::R12:
1182       case X86::R13:
1183       case X86::R14:
1184       case X86::R15:
1185         return 2;
1186     }
1187     return 1;
1188   }
1189 
1190 private:
1191   /// Get the compact unwind number for a given register. The number
1192   /// corresponds to the enum lists in compact_unwind_encoding.h.
getCompactUnwindRegNum(unsigned Reg) const1193   int getCompactUnwindRegNum(unsigned Reg) const {
1194     static const MCPhysReg CU32BitRegs[7] = {
1195       X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
1196     };
1197     static const MCPhysReg CU64BitRegs[] = {
1198       X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
1199     };
1200     const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
1201     for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
1202       if (*CURegs == Reg)
1203         return Idx;
1204 
1205     return -1;
1206   }
1207 
1208   /// Return the registers encoded for a compact encoding with a frame
1209   /// pointer.
encodeCompactUnwindRegistersWithFrame() const1210   uint32_t encodeCompactUnwindRegistersWithFrame() const {
1211     // Encode the registers in the order they were saved --- 3-bits per
1212     // register. The list of saved registers is assumed to be in reverse
1213     // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.
1214     uint32_t RegEnc = 0;
1215     for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) {
1216       unsigned Reg = SavedRegs[i];
1217       if (Reg == 0) break;
1218 
1219       int CURegNum = getCompactUnwindRegNum(Reg);
1220       if (CURegNum == -1) return ~0U;
1221 
1222       // Encode the 3-bit register number in order, skipping over 3-bits for
1223       // each register.
1224       RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
1225     }
1226 
1227     assert((RegEnc & 0x3FFFF) == RegEnc &&
1228            "Invalid compact register encoding!");
1229     return RegEnc;
1230   }
1231 
1232   /// Create the permutation encoding used with frameless stacks. It is
1233   /// passed the number of registers to be saved and an array of the registers
1234   /// saved.
encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const1235   uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const {
1236     // The saved registers are numbered from 1 to 6. In order to encode the
1237     // order in which they were saved, we re-number them according to their
1238     // place in the register order. The re-numbering is relative to the last
1239     // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in
1240     // that order:
1241     //
1242     //    Orig  Re-Num
1243     //    ----  ------
1244     //     6       6
1245     //     2       2
1246     //     4       3
1247     //     5       3
1248     //
1249     for (unsigned i = 0; i < RegCount; ++i) {
1250       int CUReg = getCompactUnwindRegNum(SavedRegs[i]);
1251       if (CUReg == -1) return ~0U;
1252       SavedRegs[i] = CUReg;
1253     }
1254 
1255     // Reverse the list.
1256     std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]);
1257 
1258     uint32_t RenumRegs[CU_NUM_SAVED_REGS];
1259     for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){
1260       unsigned Countless = 0;
1261       for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
1262         if (SavedRegs[j] < SavedRegs[i])
1263           ++Countless;
1264 
1265       RenumRegs[i] = SavedRegs[i] - Countless - 1;
1266     }
1267 
1268     // Take the renumbered values and encode them into a 10-bit number.
1269     uint32_t permutationEncoding = 0;
1270     switch (RegCount) {
1271     case 6:
1272       permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
1273                              + 6 * RenumRegs[2] +  2 * RenumRegs[3]
1274                              +     RenumRegs[4];
1275       break;
1276     case 5:
1277       permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
1278                              + 6 * RenumRegs[3] +  2 * RenumRegs[4]
1279                              +     RenumRegs[5];
1280       break;
1281     case 4:
1282       permutationEncoding |=  60 * RenumRegs[2] + 12 * RenumRegs[3]
1283                              + 3 * RenumRegs[4] +      RenumRegs[5];
1284       break;
1285     case 3:
1286       permutationEncoding |=  20 * RenumRegs[3] +  4 * RenumRegs[4]
1287                              +     RenumRegs[5];
1288       break;
1289     case 2:
1290       permutationEncoding |=   5 * RenumRegs[4] +      RenumRegs[5];
1291       break;
1292     case 1:
1293       permutationEncoding |=       RenumRegs[5];
1294       break;
1295     }
1296 
1297     assert((permutationEncoding & 0x3FF) == permutationEncoding &&
1298            "Invalid compact register encoding!");
1299     return permutationEncoding;
1300   }
1301 
1302 public:
DarwinX86AsmBackend(const Target & T,const MCRegisterInfo & MRI,const MCSubtargetInfo & STI)1303   DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI,
1304                       const MCSubtargetInfo &STI)
1305       : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()),
1306         Is64Bit(TT.isArch64Bit()) {
1307     memset(SavedRegs, 0, sizeof(SavedRegs));
1308     OffsetSize = Is64Bit ? 8 : 4;
1309     MoveInstrSize = Is64Bit ? 3 : 2;
1310     StackDivide = Is64Bit ? 8 : 4;
1311   }
1312 
1313   std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const1314   createObjectTargetWriter() const override {
1315     uint32_t CPUType = cantFail(MachO::getCPUType(TT));
1316     uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT));
1317     return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType);
1318   }
1319 
1320   /// Implementation of algorithm to generate the compact unwind encoding
1321   /// for the CFI instructions.
1322   uint32_t
generateCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs) const1323   generateCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs) const override {
1324     if (Instrs.empty()) return 0;
1325 
1326     // Reset the saved registers.
1327     unsigned SavedRegIdx = 0;
1328     memset(SavedRegs, 0, sizeof(SavedRegs));
1329 
1330     bool HasFP = false;
1331 
1332     // Encode that we are using EBP/RBP as the frame pointer.
1333     uint32_t CompactUnwindEncoding = 0;
1334 
1335     unsigned SubtractInstrIdx = Is64Bit ? 3 : 2;
1336     unsigned InstrOffset = 0;
1337     unsigned StackAdjust = 0;
1338     unsigned StackSize = 0;
1339     int MinAbsOffset = std::numeric_limits<int>::max();
1340 
1341     for (const MCCFIInstruction &Inst : Instrs) {
1342       switch (Inst.getOperation()) {
1343       default:
1344         // Any other CFI directives indicate a frame that we aren't prepared
1345         // to represent via compact unwind, so just bail out.
1346         return CU::UNWIND_MODE_DWARF;
1347       case MCCFIInstruction::OpDefCfaRegister: {
1348         // Defines a frame pointer. E.g.
1349         //
1350         //     movq %rsp, %rbp
1351         //  L0:
1352         //     .cfi_def_cfa_register %rbp
1353         //
1354         HasFP = true;
1355 
1356         // If the frame pointer is other than esp/rsp, we do not have a way to
1357         // generate a compact unwinding representation, so bail out.
1358         if (*MRI.getLLVMRegNum(Inst.getRegister(), true) !=
1359             (Is64Bit ? X86::RBP : X86::EBP))
1360           return CU::UNWIND_MODE_DWARF;
1361 
1362         // Reset the counts.
1363         memset(SavedRegs, 0, sizeof(SavedRegs));
1364         StackAdjust = 0;
1365         SavedRegIdx = 0;
1366         MinAbsOffset = std::numeric_limits<int>::max();
1367         InstrOffset += MoveInstrSize;
1368         break;
1369       }
1370       case MCCFIInstruction::OpDefCfaOffset: {
1371         // Defines a new offset for the CFA. E.g.
1372         //
1373         //  With frame:
1374         //
1375         //     pushq %rbp
1376         //  L0:
1377         //     .cfi_def_cfa_offset 16
1378         //
1379         //  Without frame:
1380         //
1381         //     subq $72, %rsp
1382         //  L0:
1383         //     .cfi_def_cfa_offset 80
1384         //
1385         StackSize = Inst.getOffset() / StackDivide;
1386         break;
1387       }
1388       case MCCFIInstruction::OpOffset: {
1389         // Defines a "push" of a callee-saved register. E.g.
1390         //
1391         //     pushq %r15
1392         //     pushq %r14
1393         //     pushq %rbx
1394         //  L0:
1395         //     subq $120, %rsp
1396         //  L1:
1397         //     .cfi_offset %rbx, -40
1398         //     .cfi_offset %r14, -32
1399         //     .cfi_offset %r15, -24
1400         //
1401         if (SavedRegIdx == CU_NUM_SAVED_REGS)
1402           // If there are too many saved registers, we cannot use a compact
1403           // unwind encoding.
1404           return CU::UNWIND_MODE_DWARF;
1405 
1406         unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true);
1407         SavedRegs[SavedRegIdx++] = Reg;
1408         StackAdjust += OffsetSize;
1409         MinAbsOffset = std::min(MinAbsOffset, abs(Inst.getOffset()));
1410         InstrOffset += PushInstrSize(Reg);
1411         break;
1412       }
1413       }
1414     }
1415 
1416     StackAdjust /= StackDivide;
1417 
1418     if (HasFP) {
1419       if ((StackAdjust & 0xFF) != StackAdjust)
1420         // Offset was too big for a compact unwind encoding.
1421         return CU::UNWIND_MODE_DWARF;
1422 
1423       // We don't attempt to track a real StackAdjust, so if the saved registers
1424       // aren't adjacent to rbp we can't cope.
1425       if (SavedRegIdx != 0 && MinAbsOffset != 3 * (int)OffsetSize)
1426         return CU::UNWIND_MODE_DWARF;
1427 
1428       // Get the encoding of the saved registers when we have a frame pointer.
1429       uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame();
1430       if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1431 
1432       CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME;
1433       CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
1434       CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
1435     } else {
1436       SubtractInstrIdx += InstrOffset;
1437       ++StackAdjust;
1438 
1439       if ((StackSize & 0xFF) == StackSize) {
1440         // Frameless stack with a small stack size.
1441         CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD;
1442 
1443         // Encode the stack size.
1444         CompactUnwindEncoding |= (StackSize & 0xFF) << 16;
1445       } else {
1446         if ((StackAdjust & 0x7) != StackAdjust)
1447           // The extra stack adjustments are too big for us to handle.
1448           return CU::UNWIND_MODE_DWARF;
1449 
1450         // Frameless stack with an offset too large for us to encode compactly.
1451         CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND;
1452 
1453         // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
1454         // instruction.
1455         CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
1456 
1457         // Encode any extra stack adjustments (done via push instructions).
1458         CompactUnwindEncoding |= (StackAdjust & 0x7) << 13;
1459       }
1460 
1461       // Encode the number of registers saved. (Reverse the list first.)
1462       std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]);
1463       CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10;
1464 
1465       // Get the encoding of the saved registers when we don't have a frame
1466       // pointer.
1467       uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx);
1468       if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1469 
1470       // Encode the register encoding.
1471       CompactUnwindEncoding |=
1472         RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION;
1473     }
1474 
1475     return CompactUnwindEncoding;
1476   }
1477 };
1478 
1479 } // end anonymous namespace
1480 
createX86_32AsmBackend(const Target & T,const MCSubtargetInfo & STI,const MCRegisterInfo & MRI,const MCTargetOptions & Options)1481 MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
1482                                            const MCSubtargetInfo &STI,
1483                                            const MCRegisterInfo &MRI,
1484                                            const MCTargetOptions &Options) {
1485   const Triple &TheTriple = STI.getTargetTriple();
1486   if (TheTriple.isOSBinFormatMachO())
1487     return new DarwinX86AsmBackend(T, MRI, STI);
1488 
1489   if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1490     return new WindowsX86AsmBackend(T, false, STI);
1491 
1492   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1493 
1494   if (TheTriple.isOSIAMCU())
1495     return new ELFX86_IAMCUAsmBackend(T, OSABI, STI);
1496 
1497   return new ELFX86_32AsmBackend(T, OSABI, STI);
1498 }
1499 
createX86_64AsmBackend(const Target & T,const MCSubtargetInfo & STI,const MCRegisterInfo & MRI,const MCTargetOptions & Options)1500 MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
1501                                            const MCSubtargetInfo &STI,
1502                                            const MCRegisterInfo &MRI,
1503                                            const MCTargetOptions &Options) {
1504   const Triple &TheTriple = STI.getTargetTriple();
1505   if (TheTriple.isOSBinFormatMachO())
1506     return new DarwinX86AsmBackend(T, MRI, STI);
1507 
1508   if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1509     return new WindowsX86AsmBackend(T, true, STI);
1510 
1511   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1512 
1513   if (TheTriple.isX32())
1514     return new ELFX86_X32AsmBackend(T, OSABI, STI);
1515   return new ELFX86_64AsmBackend(T, OSABI, STI);
1516 }
1517