1 //===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "MCTargetDesc/X86BaseInfo.h"
10 #include "MCTargetDesc/X86FixupKinds.h"
11 #include "MCTargetDesc/X86InstrRelaxTables.h"
12 #include "llvm/ADT/StringSwitch.h"
13 #include "llvm/BinaryFormat/ELF.h"
14 #include "llvm/BinaryFormat/MachO.h"
15 #include "llvm/MC/MCAsmBackend.h"
16 #include "llvm/MC/MCAsmLayout.h"
17 #include "llvm/MC/MCAssembler.h"
18 #include "llvm/MC/MCCodeEmitter.h"
19 #include "llvm/MC/MCContext.h"
20 #include "llvm/MC/MCDwarf.h"
21 #include "llvm/MC/MCELFObjectWriter.h"
22 #include "llvm/MC/MCExpr.h"
23 #include "llvm/MC/MCFixupKindInfo.h"
24 #include "llvm/MC/MCInst.h"
25 #include "llvm/MC/MCInstrInfo.h"
26 #include "llvm/MC/MCMachObjectWriter.h"
27 #include "llvm/MC/MCObjectStreamer.h"
28 #include "llvm/MC/MCObjectWriter.h"
29 #include "llvm/MC/MCRegisterInfo.h"
30 #include "llvm/MC/MCSectionMachO.h"
31 #include "llvm/MC/MCSubtargetInfo.h"
32 #include "llvm/MC/MCValue.h"
33 #include "llvm/MC/TargetRegistry.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Support/raw_ostream.h"
37
38 using namespace llvm;
39
40 namespace {
41 /// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind
42 class X86AlignBranchKind {
43 private:
44 uint8_t AlignBranchKind = 0;
45
46 public:
operator =(const std::string & Val)47 void operator=(const std::string &Val) {
48 if (Val.empty())
49 return;
50 SmallVector<StringRef, 6> BranchTypes;
51 StringRef(Val).split(BranchTypes, '+', -1, false);
52 for (auto BranchType : BranchTypes) {
53 if (BranchType == "fused")
54 addKind(X86::AlignBranchFused);
55 else if (BranchType == "jcc")
56 addKind(X86::AlignBranchJcc);
57 else if (BranchType == "jmp")
58 addKind(X86::AlignBranchJmp);
59 else if (BranchType == "call")
60 addKind(X86::AlignBranchCall);
61 else if (BranchType == "ret")
62 addKind(X86::AlignBranchRet);
63 else if (BranchType == "indirect")
64 addKind(X86::AlignBranchIndirect);
65 else {
66 errs() << "invalid argument " << BranchType.str()
67 << " to -x86-align-branch=; each element must be one of: fused, "
68 "jcc, jmp, call, ret, indirect.(plus separated)\n";
69 }
70 }
71 }
72
operator uint8_t() const73 operator uint8_t() const { return AlignBranchKind; }
addKind(X86::AlignBranchBoundaryKind Value)74 void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; }
75 };
76
77 X86AlignBranchKind X86AlignBranchKindLoc;
78
79 cl::opt<unsigned> X86AlignBranchBoundary(
80 "x86-align-branch-boundary", cl::init(0),
81 cl::desc(
82 "Control how the assembler should align branches with NOP. If the "
83 "boundary's size is not 0, it should be a power of 2 and no less "
84 "than 32. Branches will be aligned to prevent from being across or "
85 "against the boundary of specified size. The default value 0 does not "
86 "align branches."));
87
88 cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch(
89 "x86-align-branch",
90 cl::desc(
91 "Specify types of branches to align (plus separated list of types):"
92 "\njcc indicates conditional jumps"
93 "\nfused indicates fused conditional jumps"
94 "\njmp indicates direct unconditional jumps"
95 "\ncall indicates direct and indirect calls"
96 "\nret indicates rets"
97 "\nindirect indicates indirect unconditional jumps"),
98 cl::location(X86AlignBranchKindLoc));
99
100 cl::opt<bool> X86AlignBranchWithin32BBoundaries(
101 "x86-branches-within-32B-boundaries", cl::init(false),
102 cl::desc(
103 "Align selected instructions to mitigate negative performance impact "
104 "of Intel's micro code update for errata skx102. May break "
105 "assumptions about labels corresponding to particular instructions, "
106 "and should be used with caution."));
107
108 cl::opt<unsigned> X86PadMaxPrefixSize(
109 "x86-pad-max-prefix-size", cl::init(0),
110 cl::desc("Maximum number of prefixes to use for padding"));
111
112 cl::opt<bool> X86PadForAlign(
113 "x86-pad-for-align", cl::init(false), cl::Hidden,
114 cl::desc("Pad previous instructions to implement align directives"));
115
116 cl::opt<bool> X86PadForBranchAlign(
117 "x86-pad-for-branch-align", cl::init(true), cl::Hidden,
118 cl::desc("Pad previous instructions to implement branch alignment"));
119
120 class X86AsmBackend : public MCAsmBackend {
121 const MCSubtargetInfo &STI;
122 std::unique_ptr<const MCInstrInfo> MCII;
123 X86AlignBranchKind AlignBranchType;
124 Align AlignBoundary;
125 unsigned TargetPrefixMax = 0;
126
127 MCInst PrevInst;
128 MCBoundaryAlignFragment *PendingBA = nullptr;
129 std::pair<MCFragment *, size_t> PrevInstPosition;
130 bool CanPadInst;
131
132 uint8_t determinePaddingPrefix(const MCInst &Inst) const;
133 bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;
134 bool needAlign(const MCInst &Inst) const;
135 bool canPadBranches(MCObjectStreamer &OS) const;
136 bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const;
137
138 public:
X86AsmBackend(const Target & T,const MCSubtargetInfo & STI)139 X86AsmBackend(const Target &T, const MCSubtargetInfo &STI)
140 : MCAsmBackend(support::little), STI(STI),
141 MCII(T.createMCInstrInfo()) {
142 if (X86AlignBranchWithin32BBoundaries) {
143 // At the moment, this defaults to aligning fused branches, unconditional
144 // jumps, and (unfused) conditional jumps with nops. Both the
145 // instructions aligned and the alignment method (nop vs prefix) may
146 // change in the future.
147 AlignBoundary = assumeAligned(32);;
148 AlignBranchType.addKind(X86::AlignBranchFused);
149 AlignBranchType.addKind(X86::AlignBranchJcc);
150 AlignBranchType.addKind(X86::AlignBranchJmp);
151 }
152 // Allow overriding defaults set by main flag
153 if (X86AlignBranchBoundary.getNumOccurrences())
154 AlignBoundary = assumeAligned(X86AlignBranchBoundary);
155 if (X86AlignBranch.getNumOccurrences())
156 AlignBranchType = X86AlignBranchKindLoc;
157 if (X86PadMaxPrefixSize.getNumOccurrences())
158 TargetPrefixMax = X86PadMaxPrefixSize;
159 }
160
161 bool allowAutoPadding() const override;
162 bool allowEnhancedRelaxation() const override;
163 void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst,
164 const MCSubtargetInfo &STI) override;
165 void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) override;
166
getNumFixupKinds() const167 unsigned getNumFixupKinds() const override {
168 return X86::NumTargetFixupKinds;
169 }
170
171 std::optional<MCFixupKind> getFixupKind(StringRef Name) const override;
172
173 const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
174
175 bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
176 const MCValue &Target) override;
177
178 void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
179 const MCValue &Target, MutableArrayRef<char> Data,
180 uint64_t Value, bool IsResolved,
181 const MCSubtargetInfo *STI) const override;
182
183 bool mayNeedRelaxation(const MCInst &Inst,
184 const MCSubtargetInfo &STI) const override;
185
186 bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
187 const MCRelaxableFragment *DF,
188 const MCAsmLayout &Layout) const override;
189
190 void relaxInstruction(MCInst &Inst,
191 const MCSubtargetInfo &STI) const override;
192
193 bool padInstructionViaRelaxation(MCRelaxableFragment &RF,
194 MCCodeEmitter &Emitter,
195 unsigned &RemainingSize) const;
196
197 bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
198 unsigned &RemainingSize) const;
199
200 bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
201 unsigned &RemainingSize) const;
202
203 void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override;
204
205 unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const override;
206
207 bool writeNopData(raw_ostream &OS, uint64_t Count,
208 const MCSubtargetInfo *STI) const override;
209 };
210 } // end anonymous namespace
211
getRelaxedOpcodeBranch(const MCInst & Inst,bool Is16BitMode)212 static unsigned getRelaxedOpcodeBranch(const MCInst &Inst, bool Is16BitMode) {
213 unsigned Op = Inst.getOpcode();
214 switch (Op) {
215 default:
216 return Op;
217 case X86::JCC_1:
218 return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4;
219 case X86::JMP_1:
220 return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4;
221 }
222 }
223
getRelaxedOpcodeArith(const MCInst & Inst)224 static unsigned getRelaxedOpcodeArith(const MCInst &Inst) {
225 unsigned Op = Inst.getOpcode();
226 return X86::getRelaxedOpcodeArith(Op);
227 }
228
getRelaxedOpcode(const MCInst & Inst,bool Is16BitMode)229 static unsigned getRelaxedOpcode(const MCInst &Inst, bool Is16BitMode) {
230 unsigned R = getRelaxedOpcodeArith(Inst);
231 if (R != Inst.getOpcode())
232 return R;
233 return getRelaxedOpcodeBranch(Inst, Is16BitMode);
234 }
235
getCondFromBranch(const MCInst & MI,const MCInstrInfo & MCII)236 static X86::CondCode getCondFromBranch(const MCInst &MI,
237 const MCInstrInfo &MCII) {
238 unsigned Opcode = MI.getOpcode();
239 switch (Opcode) {
240 default:
241 return X86::COND_INVALID;
242 case X86::JCC_1: {
243 const MCInstrDesc &Desc = MCII.get(Opcode);
244 return static_cast<X86::CondCode>(
245 MI.getOperand(Desc.getNumOperands() - 1).getImm());
246 }
247 }
248 }
249
250 static X86::SecondMacroFusionInstKind
classifySecondInstInMacroFusion(const MCInst & MI,const MCInstrInfo & MCII)251 classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) {
252 X86::CondCode CC = getCondFromBranch(MI, MCII);
253 return classifySecondCondCodeInMacroFusion(CC);
254 }
255
256 /// Check if the instruction uses RIP relative addressing.
isRIPRelative(const MCInst & MI,const MCInstrInfo & MCII)257 static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) {
258 unsigned Opcode = MI.getOpcode();
259 const MCInstrDesc &Desc = MCII.get(Opcode);
260 uint64_t TSFlags = Desc.TSFlags;
261 unsigned CurOp = X86II::getOperandBias(Desc);
262 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
263 if (MemoryOperand < 0)
264 return false;
265 unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg;
266 unsigned BaseReg = MI.getOperand(BaseRegNum).getReg();
267 return (BaseReg == X86::RIP);
268 }
269
270 /// Check if the instruction is a prefix.
isPrefix(const MCInst & MI,const MCInstrInfo & MCII)271 static bool isPrefix(const MCInst &MI, const MCInstrInfo &MCII) {
272 return X86II::isPrefix(MCII.get(MI.getOpcode()).TSFlags);
273 }
274
275 /// Check if the instruction is valid as the first instruction in macro fusion.
isFirstMacroFusibleInst(const MCInst & Inst,const MCInstrInfo & MCII)276 static bool isFirstMacroFusibleInst(const MCInst &Inst,
277 const MCInstrInfo &MCII) {
278 // An Intel instruction with RIP relative addressing is not macro fusible.
279 if (isRIPRelative(Inst, MCII))
280 return false;
281 X86::FirstMacroFusionInstKind FIK =
282 X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode());
283 return FIK != X86::FirstMacroFusionInstKind::Invalid;
284 }
285
286 /// X86 can reduce the bytes of NOP by padding instructions with prefixes to
287 /// get a better peformance in some cases. Here, we determine which prefix is
288 /// the most suitable.
289 ///
290 /// If the instruction has a segment override prefix, use the existing one.
291 /// If the target is 64-bit, use the CS.
292 /// If the target is 32-bit,
293 /// - If the instruction has a ESP/EBP base register, use SS.
294 /// - Otherwise use DS.
determinePaddingPrefix(const MCInst & Inst) const295 uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const {
296 assert((STI.hasFeature(X86::Is32Bit) || STI.hasFeature(X86::Is64Bit)) &&
297 "Prefixes can be added only in 32-bit or 64-bit mode.");
298 const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
299 uint64_t TSFlags = Desc.TSFlags;
300
301 // Determine where the memory operand starts, if present.
302 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
303 if (MemoryOperand != -1)
304 MemoryOperand += X86II::getOperandBias(Desc);
305
306 unsigned SegmentReg = 0;
307 if (MemoryOperand >= 0) {
308 // Check for explicit segment override on memory operand.
309 SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg();
310 }
311
312 switch (TSFlags & X86II::FormMask) {
313 default:
314 break;
315 case X86II::RawFrmDstSrc: {
316 // Check segment override opcode prefix as needed (not for %ds).
317 if (Inst.getOperand(2).getReg() != X86::DS)
318 SegmentReg = Inst.getOperand(2).getReg();
319 break;
320 }
321 case X86II::RawFrmSrc: {
322 // Check segment override opcode prefix as needed (not for %ds).
323 if (Inst.getOperand(1).getReg() != X86::DS)
324 SegmentReg = Inst.getOperand(1).getReg();
325 break;
326 }
327 case X86II::RawFrmMemOffs: {
328 // Check segment override opcode prefix as needed.
329 SegmentReg = Inst.getOperand(1).getReg();
330 break;
331 }
332 }
333
334 if (SegmentReg != 0)
335 return X86::getSegmentOverridePrefixForReg(SegmentReg);
336
337 if (STI.hasFeature(X86::Is64Bit))
338 return X86::CS_Encoding;
339
340 if (MemoryOperand >= 0) {
341 unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg;
342 unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg();
343 if (BaseReg == X86::ESP || BaseReg == X86::EBP)
344 return X86::SS_Encoding;
345 }
346 return X86::DS_Encoding;
347 }
348
349 /// Check if the two instructions will be macro-fused on the target cpu.
isMacroFused(const MCInst & Cmp,const MCInst & Jcc) const350 bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const {
351 const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode());
352 if (!InstDesc.isConditionalBranch())
353 return false;
354 if (!isFirstMacroFusibleInst(Cmp, *MCII))
355 return false;
356 const X86::FirstMacroFusionInstKind CmpKind =
357 X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode());
358 const X86::SecondMacroFusionInstKind BranchKind =
359 classifySecondInstInMacroFusion(Jcc, *MCII);
360 return X86::isMacroFused(CmpKind, BranchKind);
361 }
362
363 /// Check if the instruction has a variant symbol operand.
hasVariantSymbol(const MCInst & MI)364 static bool hasVariantSymbol(const MCInst &MI) {
365 for (auto &Operand : MI) {
366 if (!Operand.isExpr())
367 continue;
368 const MCExpr &Expr = *Operand.getExpr();
369 if (Expr.getKind() == MCExpr::SymbolRef &&
370 cast<MCSymbolRefExpr>(Expr).getKind() != MCSymbolRefExpr::VK_None)
371 return true;
372 }
373 return false;
374 }
375
allowAutoPadding() const376 bool X86AsmBackend::allowAutoPadding() const {
377 return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone);
378 }
379
allowEnhancedRelaxation() const380 bool X86AsmBackend::allowEnhancedRelaxation() const {
381 return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign;
382 }
383
384 /// X86 has certain instructions which enable interrupts exactly one
385 /// instruction *after* the instruction which stores to SS. Return true if the
386 /// given instruction has such an interrupt delay slot.
hasInterruptDelaySlot(const MCInst & Inst)387 static bool hasInterruptDelaySlot(const MCInst &Inst) {
388 switch (Inst.getOpcode()) {
389 case X86::POPSS16:
390 case X86::POPSS32:
391 case X86::STI:
392 return true;
393
394 case X86::MOV16sr:
395 case X86::MOV32sr:
396 case X86::MOV64sr:
397 case X86::MOV16sm:
398 if (Inst.getOperand(0).getReg() == X86::SS)
399 return true;
400 break;
401 }
402 return false;
403 }
404
405 /// Check if the instruction to be emitted is right after any data.
406 static bool
isRightAfterData(MCFragment * CurrentFragment,const std::pair<MCFragment *,size_t> & PrevInstPosition)407 isRightAfterData(MCFragment *CurrentFragment,
408 const std::pair<MCFragment *, size_t> &PrevInstPosition) {
409 MCFragment *F = CurrentFragment;
410 // Empty data fragments may be created to prevent further data being
411 // added into the previous fragment, we need to skip them since they
412 // have no contents.
413 for (; isa_and_nonnull<MCDataFragment>(F); F = F->getPrevNode())
414 if (cast<MCDataFragment>(F)->getContents().size() != 0)
415 break;
416
417 // Since data is always emitted into a DataFragment, our check strategy is
418 // simple here.
419 // - If the fragment is a DataFragment
420 // - If it's not the fragment where the previous instruction is,
421 // returns true.
422 // - If it's the fragment holding the previous instruction but its
423 // size changed since the the previous instruction was emitted into
424 // it, returns true.
425 // - Otherwise returns false.
426 // - If the fragment is not a DataFragment, returns false.
427 if (auto *DF = dyn_cast_or_null<MCDataFragment>(F))
428 return DF != PrevInstPosition.first ||
429 DF->getContents().size() != PrevInstPosition.second;
430
431 return false;
432 }
433
434 /// \returns the fragment size if it has instructions, otherwise returns 0.
getSizeForInstFragment(const MCFragment * F)435 static size_t getSizeForInstFragment(const MCFragment *F) {
436 if (!F || !F->hasInstructions())
437 return 0;
438 // MCEncodedFragmentWithContents being templated makes this tricky.
439 switch (F->getKind()) {
440 default:
441 llvm_unreachable("Unknown fragment with instructions!");
442 case MCFragment::FT_Data:
443 return cast<MCDataFragment>(*F).getContents().size();
444 case MCFragment::FT_Relaxable:
445 return cast<MCRelaxableFragment>(*F).getContents().size();
446 case MCFragment::FT_CompactEncodedInst:
447 return cast<MCCompactEncodedInstFragment>(*F).getContents().size();
448 }
449 }
450
451 /// Return true if we can insert NOP or prefixes automatically before the
452 /// the instruction to be emitted.
canPadInst(const MCInst & Inst,MCObjectStreamer & OS) const453 bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const {
454 if (hasVariantSymbol(Inst))
455 // Linker may rewrite the instruction with variant symbol operand(e.g.
456 // TLSCALL).
457 return false;
458
459 if (hasInterruptDelaySlot(PrevInst))
460 // If this instruction follows an interrupt enabling instruction with a one
461 // instruction delay, inserting a nop would change behavior.
462 return false;
463
464 if (isPrefix(PrevInst, *MCII))
465 // If this instruction follows a prefix, inserting a nop/prefix would change
466 // semantic.
467 return false;
468
469 if (isPrefix(Inst, *MCII))
470 // If this instruction is a prefix, inserting a prefix would change
471 // semantic.
472 return false;
473
474 if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition))
475 // If this instruction follows any data, there is no clear
476 // instruction boundary, inserting a nop/prefix would change semantic.
477 return false;
478
479 return true;
480 }
481
canPadBranches(MCObjectStreamer & OS) const482 bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const {
483 if (!OS.getAllowAutoPadding())
484 return false;
485 assert(allowAutoPadding() && "incorrect initialization!");
486
487 // We only pad in text section.
488 if (!OS.getCurrentSectionOnly()->getKind().isText())
489 return false;
490
491 // To be Done: Currently don't deal with Bundle cases.
492 if (OS.getAssembler().isBundlingEnabled())
493 return false;
494
495 // Branches only need to be aligned in 32-bit or 64-bit mode.
496 if (!(STI.hasFeature(X86::Is64Bit) || STI.hasFeature(X86::Is32Bit)))
497 return false;
498
499 return true;
500 }
501
502 /// Check if the instruction operand needs to be aligned.
needAlign(const MCInst & Inst) const503 bool X86AsmBackend::needAlign(const MCInst &Inst) const {
504 const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
505 return (Desc.isConditionalBranch() &&
506 (AlignBranchType & X86::AlignBranchJcc)) ||
507 (Desc.isUnconditionalBranch() &&
508 (AlignBranchType & X86::AlignBranchJmp)) ||
509 (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) ||
510 (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) ||
511 (Desc.isIndirectBranch() &&
512 (AlignBranchType & X86::AlignBranchIndirect));
513 }
514
515 /// Insert BoundaryAlignFragment before instructions to align branches.
emitInstructionBegin(MCObjectStreamer & OS,const MCInst & Inst,const MCSubtargetInfo & STI)516 void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
517 const MCInst &Inst, const MCSubtargetInfo &STI) {
518 CanPadInst = canPadInst(Inst, OS);
519
520 if (!canPadBranches(OS))
521 return;
522
523 if (!isMacroFused(PrevInst, Inst))
524 // Macro fusion doesn't happen indeed, clear the pending.
525 PendingBA = nullptr;
526
527 if (!CanPadInst)
528 return;
529
530 if (PendingBA && OS.getCurrentFragment()->getPrevNode() == PendingBA) {
531 // Macro fusion actually happens and there is no other fragment inserted
532 // after the previous instruction.
533 //
534 // Do nothing here since we already inserted a BoudaryAlign fragment when
535 // we met the first instruction in the fused pair and we'll tie them
536 // together in emitInstructionEnd.
537 //
538 // Note: When there is at least one fragment, such as MCAlignFragment,
539 // inserted after the previous instruction, e.g.
540 //
541 // \code
542 // cmp %rax %rcx
543 // .align 16
544 // je .Label0
545 // \ endcode
546 //
547 // We will treat the JCC as a unfused branch although it may be fused
548 // with the CMP.
549 return;
550 }
551
552 if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) &&
553 isFirstMacroFusibleInst(Inst, *MCII))) {
554 // If we meet a unfused branch or the first instuction in a fusiable pair,
555 // insert a BoundaryAlign fragment.
556 OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary, STI));
557 }
558 }
559
560 /// Set the last fragment to be aligned for the BoundaryAlignFragment.
emitInstructionEnd(MCObjectStreamer & OS,const MCInst & Inst)561 void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) {
562 PrevInst = Inst;
563 MCFragment *CF = OS.getCurrentFragment();
564 PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF));
565 if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF))
566 F->setAllowAutoPadding(CanPadInst);
567
568 if (!canPadBranches(OS))
569 return;
570
571 if (!needAlign(Inst) || !PendingBA)
572 return;
573
574 // Tie the aligned instructions into a a pending BoundaryAlign.
575 PendingBA->setLastFragment(CF);
576 PendingBA = nullptr;
577
578 // We need to ensure that further data isn't added to the current
579 // DataFragment, so that we can get the size of instructions later in
580 // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty
581 // DataFragment.
582 if (isa_and_nonnull<MCDataFragment>(CF))
583 OS.insert(new MCDataFragment());
584
585 // Update the maximum alignment on the current section if necessary.
586 MCSection *Sec = OS.getCurrentSectionOnly();
587 Sec->ensureMinAlignment(AlignBoundary);
588 }
589
getFixupKind(StringRef Name) const590 std::optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const {
591 if (STI.getTargetTriple().isOSBinFormatELF()) {
592 unsigned Type;
593 if (STI.getTargetTriple().getArch() == Triple::x86_64) {
594 Type = llvm::StringSwitch<unsigned>(Name)
595 #define ELF_RELOC(X, Y) .Case(#X, Y)
596 #include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
597 #undef ELF_RELOC
598 .Case("BFD_RELOC_NONE", ELF::R_X86_64_NONE)
599 .Case("BFD_RELOC_8", ELF::R_X86_64_8)
600 .Case("BFD_RELOC_16", ELF::R_X86_64_16)
601 .Case("BFD_RELOC_32", ELF::R_X86_64_32)
602 .Case("BFD_RELOC_64", ELF::R_X86_64_64)
603 .Default(-1u);
604 } else {
605 Type = llvm::StringSwitch<unsigned>(Name)
606 #define ELF_RELOC(X, Y) .Case(#X, Y)
607 #include "llvm/BinaryFormat/ELFRelocs/i386.def"
608 #undef ELF_RELOC
609 .Case("BFD_RELOC_NONE", ELF::R_386_NONE)
610 .Case("BFD_RELOC_8", ELF::R_386_8)
611 .Case("BFD_RELOC_16", ELF::R_386_16)
612 .Case("BFD_RELOC_32", ELF::R_386_32)
613 .Default(-1u);
614 }
615 if (Type == -1u)
616 return std::nullopt;
617 return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
618 }
619 return MCAsmBackend::getFixupKind(Name);
620 }
621
getFixupKindInfo(MCFixupKind Kind) const622 const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
623 const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
624 {"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
625 {"reloc_riprel_4byte_movq_load", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
626 {"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
627 {"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
628 {"reloc_signed_4byte", 0, 32, 0},
629 {"reloc_signed_4byte_relax", 0, 32, 0},
630 {"reloc_global_offset_table", 0, 32, 0},
631 {"reloc_global_offset_table8", 0, 64, 0},
632 {"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
633 };
634
635 // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They
636 // do not require any extra processing.
637 if (Kind >= FirstLiteralRelocationKind)
638 return MCAsmBackend::getFixupKindInfo(FK_NONE);
639
640 if (Kind < FirstTargetFixupKind)
641 return MCAsmBackend::getFixupKindInfo(Kind);
642
643 assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
644 "Invalid kind!");
645 assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!");
646 return Infos[Kind - FirstTargetFixupKind];
647 }
648
shouldForceRelocation(const MCAssembler &,const MCFixup & Fixup,const MCValue &)649 bool X86AsmBackend::shouldForceRelocation(const MCAssembler &,
650 const MCFixup &Fixup,
651 const MCValue &) {
652 return Fixup.getKind() >= FirstLiteralRelocationKind;
653 }
654
getFixupKindSize(unsigned Kind)655 static unsigned getFixupKindSize(unsigned Kind) {
656 switch (Kind) {
657 default:
658 llvm_unreachable("invalid fixup kind!");
659 case FK_NONE:
660 return 0;
661 case FK_PCRel_1:
662 case FK_SecRel_1:
663 case FK_Data_1:
664 return 1;
665 case FK_PCRel_2:
666 case FK_SecRel_2:
667 case FK_Data_2:
668 return 2;
669 case FK_PCRel_4:
670 case X86::reloc_riprel_4byte:
671 case X86::reloc_riprel_4byte_relax:
672 case X86::reloc_riprel_4byte_relax_rex:
673 case X86::reloc_riprel_4byte_movq_load:
674 case X86::reloc_signed_4byte:
675 case X86::reloc_signed_4byte_relax:
676 case X86::reloc_global_offset_table:
677 case X86::reloc_branch_4byte_pcrel:
678 case FK_SecRel_4:
679 case FK_Data_4:
680 return 4;
681 case FK_PCRel_8:
682 case FK_SecRel_8:
683 case FK_Data_8:
684 case X86::reloc_global_offset_table8:
685 return 8;
686 }
687 }
688
applyFixup(const MCAssembler & Asm,const MCFixup & Fixup,const MCValue & Target,MutableArrayRef<char> Data,uint64_t Value,bool IsResolved,const MCSubtargetInfo * STI) const689 void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
690 const MCValue &Target,
691 MutableArrayRef<char> Data,
692 uint64_t Value, bool IsResolved,
693 const MCSubtargetInfo *STI) const {
694 unsigned Kind = Fixup.getKind();
695 if (Kind >= FirstLiteralRelocationKind)
696 return;
697 unsigned Size = getFixupKindSize(Kind);
698
699 assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
700
701 int64_t SignedValue = static_cast<int64_t>(Value);
702 if ((Target.isAbsolute() || IsResolved) &&
703 getFixupKindInfo(Fixup.getKind()).Flags &
704 MCFixupKindInfo::FKF_IsPCRel) {
705 // check that PC relative fixup fits into the fixup size.
706 if (Size > 0 && !isIntN(Size * 8, SignedValue))
707 Asm.getContext().reportError(
708 Fixup.getLoc(), "value of " + Twine(SignedValue) +
709 " is too large for field of " + Twine(Size) +
710 ((Size == 1) ? " byte." : " bytes."));
711 } else {
712 // Check that uppper bits are either all zeros or all ones.
713 // Specifically ignore overflow/underflow as long as the leakage is
714 // limited to the lower bits. This is to remain compatible with
715 // other assemblers.
716 assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) &&
717 "Value does not fit in the Fixup field");
718 }
719
720 for (unsigned i = 0; i != Size; ++i)
721 Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
722 }
723
mayNeedRelaxation(const MCInst & Inst,const MCSubtargetInfo & STI) const724 bool X86AsmBackend::mayNeedRelaxation(const MCInst &Inst,
725 const MCSubtargetInfo &STI) const {
726 // Branches can always be relaxed in either mode.
727 if (getRelaxedOpcodeBranch(Inst, false) != Inst.getOpcode())
728 return true;
729
730 // Check if this instruction is ever relaxable.
731 if (getRelaxedOpcodeArith(Inst) == Inst.getOpcode())
732 return false;
733
734
735 // Check if the relaxable operand has an expression. For the current set of
736 // relaxable instructions, the relaxable operand is always the last operand.
737 unsigned RelaxableOp = Inst.getNumOperands() - 1;
738 if (Inst.getOperand(RelaxableOp).isExpr())
739 return true;
740
741 return false;
742 }
743
fixupNeedsRelaxation(const MCFixup & Fixup,uint64_t Value,const MCRelaxableFragment * DF,const MCAsmLayout & Layout) const744 bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
745 uint64_t Value,
746 const MCRelaxableFragment *DF,
747 const MCAsmLayout &Layout) const {
748 // Relax if the value is too big for a (signed) i8.
749 return !isInt<8>(Value);
750 }
751
752 // FIXME: Can tblgen help at all here to verify there aren't other instructions
753 // we can relax?
relaxInstruction(MCInst & Inst,const MCSubtargetInfo & STI) const754 void X86AsmBackend::relaxInstruction(MCInst &Inst,
755 const MCSubtargetInfo &STI) const {
756 // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
757 bool Is16BitMode = STI.getFeatureBits()[X86::Is16Bit];
758 unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode);
759
760 if (RelaxedOp == Inst.getOpcode()) {
761 SmallString<256> Tmp;
762 raw_svector_ostream OS(Tmp);
763 Inst.dump_pretty(OS);
764 OS << "\n";
765 report_fatal_error("unexpected instruction to relax: " + OS.str());
766 }
767
768 Inst.setOpcode(RelaxedOp);
769 }
770
771 /// Return true if this instruction has been fully relaxed into it's most
772 /// general available form.
isFullyRelaxed(const MCRelaxableFragment & RF)773 static bool isFullyRelaxed(const MCRelaxableFragment &RF) {
774 auto &Inst = RF.getInst();
775 auto &STI = *RF.getSubtargetInfo();
776 bool Is16BitMode = STI.getFeatureBits()[X86::Is16Bit];
777 return getRelaxedOpcode(Inst, Is16BitMode) == Inst.getOpcode();
778 }
779
padInstructionViaPrefix(MCRelaxableFragment & RF,MCCodeEmitter & Emitter,unsigned & RemainingSize) const780 bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF,
781 MCCodeEmitter &Emitter,
782 unsigned &RemainingSize) const {
783 if (!RF.getAllowAutoPadding())
784 return false;
785 // If the instruction isn't fully relaxed, shifting it around might require a
786 // larger value for one of the fixups then can be encoded. The outer loop
787 // will also catch this before moving to the next instruction, but we need to
788 // prevent padding this single instruction as well.
789 if (!isFullyRelaxed(RF))
790 return false;
791
792 const unsigned OldSize = RF.getContents().size();
793 if (OldSize == 15)
794 return false;
795
796 const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize);
797 const unsigned RemainingPrefixSize = [&]() -> unsigned {
798 SmallString<15> Code;
799 raw_svector_ostream VecOS(Code);
800 Emitter.emitPrefix(RF.getInst(), VecOS, STI);
801 assert(Code.size() < 15 && "The number of prefixes must be less than 15.");
802
803 // TODO: It turns out we need a decent amount of plumbing for the target
804 // specific bits to determine number of prefixes its safe to add. Various
805 // targets (older chips mostly, but also Atom family) encounter decoder
806 // stalls with too many prefixes. For testing purposes, we set the value
807 // externally for the moment.
808 unsigned ExistingPrefixSize = Code.size();
809 if (TargetPrefixMax <= ExistingPrefixSize)
810 return 0;
811 return TargetPrefixMax - ExistingPrefixSize;
812 }();
813 const unsigned PrefixBytesToAdd =
814 std::min(MaxPossiblePad, RemainingPrefixSize);
815 if (PrefixBytesToAdd == 0)
816 return false;
817
818 const uint8_t Prefix = determinePaddingPrefix(RF.getInst());
819
820 SmallString<256> Code;
821 Code.append(PrefixBytesToAdd, Prefix);
822 Code.append(RF.getContents().begin(), RF.getContents().end());
823 RF.getContents() = Code;
824
825 // Adjust the fixups for the change in offsets
826 for (auto &F : RF.getFixups()) {
827 F.setOffset(F.getOffset() + PrefixBytesToAdd);
828 }
829
830 RemainingSize -= PrefixBytesToAdd;
831 return true;
832 }
833
padInstructionViaRelaxation(MCRelaxableFragment & RF,MCCodeEmitter & Emitter,unsigned & RemainingSize) const834 bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF,
835 MCCodeEmitter &Emitter,
836 unsigned &RemainingSize) const {
837 if (isFullyRelaxed(RF))
838 // TODO: There are lots of other tricks we could apply for increasing
839 // encoding size without impacting performance.
840 return false;
841
842 MCInst Relaxed = RF.getInst();
843 relaxInstruction(Relaxed, *RF.getSubtargetInfo());
844
845 SmallVector<MCFixup, 4> Fixups;
846 SmallString<15> Code;
847 raw_svector_ostream VecOS(Code);
848 Emitter.encodeInstruction(Relaxed, VecOS, Fixups, *RF.getSubtargetInfo());
849 const unsigned OldSize = RF.getContents().size();
850 const unsigned NewSize = Code.size();
851 assert(NewSize >= OldSize && "size decrease during relaxation?");
852 unsigned Delta = NewSize - OldSize;
853 if (Delta > RemainingSize)
854 return false;
855 RF.setInst(Relaxed);
856 RF.getContents() = Code;
857 RF.getFixups() = Fixups;
858 RemainingSize -= Delta;
859 return true;
860 }
861
padInstructionEncoding(MCRelaxableFragment & RF,MCCodeEmitter & Emitter,unsigned & RemainingSize) const862 bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF,
863 MCCodeEmitter &Emitter,
864 unsigned &RemainingSize) const {
865 bool Changed = false;
866 if (RemainingSize != 0)
867 Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize);
868 if (RemainingSize != 0)
869 Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize);
870 return Changed;
871 }
872
finishLayout(MCAssembler const & Asm,MCAsmLayout & Layout) const873 void X86AsmBackend::finishLayout(MCAssembler const &Asm,
874 MCAsmLayout &Layout) const {
875 // See if we can further relax some instructions to cut down on the number of
876 // nop bytes required for code alignment. The actual win is in reducing
877 // instruction count, not number of bytes. Modern X86-64 can easily end up
878 // decode limited. It is often better to reduce the number of instructions
879 // (i.e. eliminate nops) even at the cost of increasing the size and
880 // complexity of others.
881 if (!X86PadForAlign && !X86PadForBranchAlign)
882 return;
883
884 // The processed regions are delimitered by LabeledFragments. -g may have more
885 // MCSymbols and therefore different relaxation results. X86PadForAlign is
886 // disabled by default to eliminate the -g vs non -g difference.
887 DenseSet<MCFragment *> LabeledFragments;
888 for (const MCSymbol &S : Asm.symbols())
889 LabeledFragments.insert(S.getFragment(false));
890
891 for (MCSection &Sec : Asm) {
892 if (!Sec.getKind().isText())
893 continue;
894
895 SmallVector<MCRelaxableFragment *, 4> Relaxable;
896 for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) {
897 MCFragment &F = *I;
898
899 if (LabeledFragments.count(&F))
900 Relaxable.clear();
901
902 if (F.getKind() == MCFragment::FT_Data ||
903 F.getKind() == MCFragment::FT_CompactEncodedInst)
904 // Skip and ignore
905 continue;
906
907 if (F.getKind() == MCFragment::FT_Relaxable) {
908 auto &RF = cast<MCRelaxableFragment>(*I);
909 Relaxable.push_back(&RF);
910 continue;
911 }
912
913 auto canHandle = [](MCFragment &F) -> bool {
914 switch (F.getKind()) {
915 default:
916 return false;
917 case MCFragment::FT_Align:
918 return X86PadForAlign;
919 case MCFragment::FT_BoundaryAlign:
920 return X86PadForBranchAlign;
921 }
922 };
923 // For any unhandled kind, assume we can't change layout.
924 if (!canHandle(F)) {
925 Relaxable.clear();
926 continue;
927 }
928
929 #ifndef NDEBUG
930 const uint64_t OrigOffset = Layout.getFragmentOffset(&F);
931 #endif
932 const uint64_t OrigSize = Asm.computeFragmentSize(Layout, F);
933
934 // To keep the effects local, prefer to relax instructions closest to
935 // the align directive. This is purely about human understandability
936 // of the resulting code. If we later find a reason to expand
937 // particular instructions over others, we can adjust.
938 MCFragment *FirstChangedFragment = nullptr;
939 unsigned RemainingSize = OrigSize;
940 while (!Relaxable.empty() && RemainingSize != 0) {
941 auto &RF = *Relaxable.pop_back_val();
942 // Give the backend a chance to play any tricks it wishes to increase
943 // the encoding size of the given instruction. Target independent code
944 // will try further relaxation, but target's may play further tricks.
945 if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize))
946 FirstChangedFragment = &RF;
947
948 // If we have an instruction which hasn't been fully relaxed, we can't
949 // skip past it and insert bytes before it. Changing its starting
950 // offset might require a larger negative offset than it can encode.
951 // We don't need to worry about larger positive offsets as none of the
952 // possible offsets between this and our align are visible, and the
953 // ones afterwards aren't changing.
954 if (!isFullyRelaxed(RF))
955 break;
956 }
957 Relaxable.clear();
958
959 if (FirstChangedFragment) {
960 // Make sure the offsets for any fragments in the effected range get
961 // updated. Note that this (conservatively) invalidates the offsets of
962 // those following, but this is not required.
963 Layout.invalidateFragmentsFrom(FirstChangedFragment);
964 }
965
966 // BoundaryAlign explicitly tracks it's size (unlike align)
967 if (F.getKind() == MCFragment::FT_BoundaryAlign)
968 cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize);
969
970 #ifndef NDEBUG
971 const uint64_t FinalOffset = Layout.getFragmentOffset(&F);
972 const uint64_t FinalSize = Asm.computeFragmentSize(Layout, F);
973 assert(OrigOffset + OrigSize == FinalOffset + FinalSize &&
974 "can't move start of next fragment!");
975 assert(FinalSize == RemainingSize && "inconsistent size computation?");
976 #endif
977
978 // If we're looking at a boundary align, make sure we don't try to pad
979 // its target instructions for some following directive. Doing so would
980 // break the alignment of the current boundary align.
981 if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) {
982 const MCFragment *LastFragment = BF->getLastFragment();
983 if (!LastFragment)
984 continue;
985 while (&*I != LastFragment)
986 ++I;
987 }
988 }
989 }
990
991 // The layout is done. Mark every fragment as valid.
992 for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
993 MCSection &Section = *Layout.getSectionOrder()[i];
994 Layout.getFragmentOffset(&*Section.getFragmentList().rbegin());
995 Asm.computeFragmentSize(Layout, *Section.getFragmentList().rbegin());
996 }
997 }
998
getMaximumNopSize(const MCSubtargetInfo & STI) const999 unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const {
1000 if (STI.hasFeature(X86::Is16Bit))
1001 return 4;
1002 if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Is64Bit))
1003 return 1;
1004 if (STI.getFeatureBits()[X86::TuningFast7ByteNOP])
1005 return 7;
1006 if (STI.getFeatureBits()[X86::TuningFast15ByteNOP])
1007 return 15;
1008 if (STI.getFeatureBits()[X86::TuningFast11ByteNOP])
1009 return 11;
1010 // FIXME: handle 32-bit mode
1011 // 15-bytes is the longest single NOP instruction, but 10-bytes is
1012 // commonly the longest that can be efficiently decoded.
1013 return 10;
1014 }
1015
1016 /// Write a sequence of optimal nops to the output, covering \p Count
1017 /// bytes.
1018 /// \return - true on success, false on failure
writeNopData(raw_ostream & OS,uint64_t Count,const MCSubtargetInfo * STI) const1019 bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
1020 const MCSubtargetInfo *STI) const {
1021
1022 // Write 1 or 2 byte NOP sequences, or a longer trapsled, until
1023 // we have written Count bytes
1024 do {
1025 const uint8_t ThisNopLength = (uint8_t) std::min(Count, (uint64_t)127);
1026 switch (ThisNopLength) {
1027 case 0: break;
1028 case 1: OS << '\x90';
1029 break;
1030 case 2: OS << '\x66';
1031 OS << '\x90';
1032 break;
1033 default: OS << '\xEB';
1034 OS << (uint8_t)(ThisNopLength - 2);
1035 for(uint8_t i = 2; i < ThisNopLength; ++i)
1036 OS << '\xCC';
1037 }
1038 Count -= ThisNopLength;
1039 } while (Count != 0);
1040
1041 return true;
1042 }
1043
1044 /* *** */
1045
1046 namespace {
1047
1048 class ELFX86AsmBackend : public X86AsmBackend {
1049 public:
1050 uint8_t OSABI;
ELFX86AsmBackend(const Target & T,uint8_t OSABI,const MCSubtargetInfo & STI)1051 ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI)
1052 : X86AsmBackend(T, STI), OSABI(OSABI) {}
1053 };
1054
1055 class ELFX86_32AsmBackend : public ELFX86AsmBackend {
1056 public:
ELFX86_32AsmBackend(const Target & T,uint8_t OSABI,const MCSubtargetInfo & STI)1057 ELFX86_32AsmBackend(const Target &T, uint8_t OSABI,
1058 const MCSubtargetInfo &STI)
1059 : ELFX86AsmBackend(T, OSABI, STI) {}
1060
1061 std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const1062 createObjectTargetWriter() const override {
1063 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, ELF::EM_386);
1064 }
1065 };
1066
1067 class ELFX86_X32AsmBackend : public ELFX86AsmBackend {
1068 public:
ELFX86_X32AsmBackend(const Target & T,uint8_t OSABI,const MCSubtargetInfo & STI)1069 ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI,
1070 const MCSubtargetInfo &STI)
1071 : ELFX86AsmBackend(T, OSABI, STI) {}
1072
1073 std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const1074 createObjectTargetWriter() const override {
1075 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1076 ELF::EM_X86_64);
1077 }
1078 };
1079
1080 class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend {
1081 public:
ELFX86_IAMCUAsmBackend(const Target & T,uint8_t OSABI,const MCSubtargetInfo & STI)1082 ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI,
1083 const MCSubtargetInfo &STI)
1084 : ELFX86AsmBackend(T, OSABI, STI) {}
1085
1086 std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const1087 createObjectTargetWriter() const override {
1088 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1089 ELF::EM_IAMCU);
1090 }
1091 };
1092
1093 class ELFX86_64AsmBackend : public ELFX86AsmBackend {
1094 public:
ELFX86_64AsmBackend(const Target & T,uint8_t OSABI,const MCSubtargetInfo & STI)1095 ELFX86_64AsmBackend(const Target &T, uint8_t OSABI,
1096 const MCSubtargetInfo &STI)
1097 : ELFX86AsmBackend(T, OSABI, STI) {}
1098
1099 std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const1100 createObjectTargetWriter() const override {
1101 return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI, ELF::EM_X86_64);
1102 }
1103 };
1104
1105 class WindowsX86AsmBackend : public X86AsmBackend {
1106 bool Is64Bit;
1107
1108 public:
WindowsX86AsmBackend(const Target & T,bool is64Bit,const MCSubtargetInfo & STI)1109 WindowsX86AsmBackend(const Target &T, bool is64Bit,
1110 const MCSubtargetInfo &STI)
1111 : X86AsmBackend(T, STI)
1112 , Is64Bit(is64Bit) {
1113 }
1114
getFixupKind(StringRef Name) const1115 std::optional<MCFixupKind> getFixupKind(StringRef Name) const override {
1116 return StringSwitch<std::optional<MCFixupKind>>(Name)
1117 .Case("dir32", FK_Data_4)
1118 .Case("secrel32", FK_SecRel_4)
1119 .Case("secidx", FK_SecRel_2)
1120 .Default(MCAsmBackend::getFixupKind(Name));
1121 }
1122
1123 std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const1124 createObjectTargetWriter() const override {
1125 return createX86WinCOFFObjectWriter(Is64Bit);
1126 }
1127 };
1128
1129 namespace CU {
1130
1131 /// Compact unwind encoding values.
1132 enum CompactUnwindEncodings {
1133 /// [RE]BP based frame where [RE]BP is pused on the stack immediately after
1134 /// the return address, then [RE]SP is moved to [RE]BP.
1135 UNWIND_MODE_BP_FRAME = 0x01000000,
1136
1137 /// A frameless function with a small constant stack size.
1138 UNWIND_MODE_STACK_IMMD = 0x02000000,
1139
1140 /// A frameless function with a large constant stack size.
1141 UNWIND_MODE_STACK_IND = 0x03000000,
1142
1143 /// No compact unwind encoding is available.
1144 UNWIND_MODE_DWARF = 0x04000000,
1145
1146 /// Mask for encoding the frame registers.
1147 UNWIND_BP_FRAME_REGISTERS = 0x00007FFF,
1148
1149 /// Mask for encoding the frameless registers.
1150 UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
1151 };
1152
1153 } // namespace CU
1154
1155 class DarwinX86AsmBackend : public X86AsmBackend {
1156 const MCRegisterInfo &MRI;
1157
1158 /// Number of registers that can be saved in a compact unwind encoding.
1159 enum { CU_NUM_SAVED_REGS = 6 };
1160
1161 mutable unsigned SavedRegs[CU_NUM_SAVED_REGS];
1162 Triple TT;
1163 bool Is64Bit;
1164
1165 unsigned OffsetSize; ///< Offset of a "push" instruction.
1166 unsigned MoveInstrSize; ///< Size of a "move" instruction.
1167 unsigned StackDivide; ///< Amount to adjust stack size by.
1168 protected:
1169 /// Size of a "push" instruction for the given register.
PushInstrSize(unsigned Reg) const1170 unsigned PushInstrSize(unsigned Reg) const {
1171 switch (Reg) {
1172 case X86::EBX:
1173 case X86::ECX:
1174 case X86::EDX:
1175 case X86::EDI:
1176 case X86::ESI:
1177 case X86::EBP:
1178 case X86::RBX:
1179 case X86::RBP:
1180 return 1;
1181 case X86::R12:
1182 case X86::R13:
1183 case X86::R14:
1184 case X86::R15:
1185 return 2;
1186 }
1187 return 1;
1188 }
1189
1190 private:
1191 /// Get the compact unwind number for a given register. The number
1192 /// corresponds to the enum lists in compact_unwind_encoding.h.
getCompactUnwindRegNum(unsigned Reg) const1193 int getCompactUnwindRegNum(unsigned Reg) const {
1194 static const MCPhysReg CU32BitRegs[7] = {
1195 X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
1196 };
1197 static const MCPhysReg CU64BitRegs[] = {
1198 X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
1199 };
1200 const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
1201 for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
1202 if (*CURegs == Reg)
1203 return Idx;
1204
1205 return -1;
1206 }
1207
1208 /// Return the registers encoded for a compact encoding with a frame
1209 /// pointer.
encodeCompactUnwindRegistersWithFrame() const1210 uint32_t encodeCompactUnwindRegistersWithFrame() const {
1211 // Encode the registers in the order they were saved --- 3-bits per
1212 // register. The list of saved registers is assumed to be in reverse
1213 // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.
1214 uint32_t RegEnc = 0;
1215 for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) {
1216 unsigned Reg = SavedRegs[i];
1217 if (Reg == 0) break;
1218
1219 int CURegNum = getCompactUnwindRegNum(Reg);
1220 if (CURegNum == -1) return ~0U;
1221
1222 // Encode the 3-bit register number in order, skipping over 3-bits for
1223 // each register.
1224 RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
1225 }
1226
1227 assert((RegEnc & 0x3FFFF) == RegEnc &&
1228 "Invalid compact register encoding!");
1229 return RegEnc;
1230 }
1231
1232 /// Create the permutation encoding used with frameless stacks. It is
1233 /// passed the number of registers to be saved and an array of the registers
1234 /// saved.
encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const1235 uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const {
1236 // The saved registers are numbered from 1 to 6. In order to encode the
1237 // order in which they were saved, we re-number them according to their
1238 // place in the register order. The re-numbering is relative to the last
1239 // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in
1240 // that order:
1241 //
1242 // Orig Re-Num
1243 // ---- ------
1244 // 6 6
1245 // 2 2
1246 // 4 3
1247 // 5 3
1248 //
1249 for (unsigned i = 0; i < RegCount; ++i) {
1250 int CUReg = getCompactUnwindRegNum(SavedRegs[i]);
1251 if (CUReg == -1) return ~0U;
1252 SavedRegs[i] = CUReg;
1253 }
1254
1255 // Reverse the list.
1256 std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]);
1257
1258 uint32_t RenumRegs[CU_NUM_SAVED_REGS];
1259 for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){
1260 unsigned Countless = 0;
1261 for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
1262 if (SavedRegs[j] < SavedRegs[i])
1263 ++Countless;
1264
1265 RenumRegs[i] = SavedRegs[i] - Countless - 1;
1266 }
1267
1268 // Take the renumbered values and encode them into a 10-bit number.
1269 uint32_t permutationEncoding = 0;
1270 switch (RegCount) {
1271 case 6:
1272 permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
1273 + 6 * RenumRegs[2] + 2 * RenumRegs[3]
1274 + RenumRegs[4];
1275 break;
1276 case 5:
1277 permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
1278 + 6 * RenumRegs[3] + 2 * RenumRegs[4]
1279 + RenumRegs[5];
1280 break;
1281 case 4:
1282 permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3]
1283 + 3 * RenumRegs[4] + RenumRegs[5];
1284 break;
1285 case 3:
1286 permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4]
1287 + RenumRegs[5];
1288 break;
1289 case 2:
1290 permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5];
1291 break;
1292 case 1:
1293 permutationEncoding |= RenumRegs[5];
1294 break;
1295 }
1296
1297 assert((permutationEncoding & 0x3FF) == permutationEncoding &&
1298 "Invalid compact register encoding!");
1299 return permutationEncoding;
1300 }
1301
1302 public:
DarwinX86AsmBackend(const Target & T,const MCRegisterInfo & MRI,const MCSubtargetInfo & STI)1303 DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI,
1304 const MCSubtargetInfo &STI)
1305 : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()),
1306 Is64Bit(TT.isArch64Bit()) {
1307 memset(SavedRegs, 0, sizeof(SavedRegs));
1308 OffsetSize = Is64Bit ? 8 : 4;
1309 MoveInstrSize = Is64Bit ? 3 : 2;
1310 StackDivide = Is64Bit ? 8 : 4;
1311 }
1312
1313 std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const1314 createObjectTargetWriter() const override {
1315 uint32_t CPUType = cantFail(MachO::getCPUType(TT));
1316 uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT));
1317 return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType);
1318 }
1319
1320 /// Implementation of algorithm to generate the compact unwind encoding
1321 /// for the CFI instructions.
1322 uint32_t
generateCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs) const1323 generateCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs) const override {
1324 if (Instrs.empty()) return 0;
1325
1326 // Reset the saved registers.
1327 unsigned SavedRegIdx = 0;
1328 memset(SavedRegs, 0, sizeof(SavedRegs));
1329
1330 bool HasFP = false;
1331
1332 // Encode that we are using EBP/RBP as the frame pointer.
1333 uint32_t CompactUnwindEncoding = 0;
1334
1335 unsigned SubtractInstrIdx = Is64Bit ? 3 : 2;
1336 unsigned InstrOffset = 0;
1337 unsigned StackAdjust = 0;
1338 unsigned StackSize = 0;
1339 int MinAbsOffset = std::numeric_limits<int>::max();
1340
1341 for (const MCCFIInstruction &Inst : Instrs) {
1342 switch (Inst.getOperation()) {
1343 default:
1344 // Any other CFI directives indicate a frame that we aren't prepared
1345 // to represent via compact unwind, so just bail out.
1346 return CU::UNWIND_MODE_DWARF;
1347 case MCCFIInstruction::OpDefCfaRegister: {
1348 // Defines a frame pointer. E.g.
1349 //
1350 // movq %rsp, %rbp
1351 // L0:
1352 // .cfi_def_cfa_register %rbp
1353 //
1354 HasFP = true;
1355
1356 // If the frame pointer is other than esp/rsp, we do not have a way to
1357 // generate a compact unwinding representation, so bail out.
1358 if (*MRI.getLLVMRegNum(Inst.getRegister(), true) !=
1359 (Is64Bit ? X86::RBP : X86::EBP))
1360 return CU::UNWIND_MODE_DWARF;
1361
1362 // Reset the counts.
1363 memset(SavedRegs, 0, sizeof(SavedRegs));
1364 StackAdjust = 0;
1365 SavedRegIdx = 0;
1366 MinAbsOffset = std::numeric_limits<int>::max();
1367 InstrOffset += MoveInstrSize;
1368 break;
1369 }
1370 case MCCFIInstruction::OpDefCfaOffset: {
1371 // Defines a new offset for the CFA. E.g.
1372 //
1373 // With frame:
1374 //
1375 // pushq %rbp
1376 // L0:
1377 // .cfi_def_cfa_offset 16
1378 //
1379 // Without frame:
1380 //
1381 // subq $72, %rsp
1382 // L0:
1383 // .cfi_def_cfa_offset 80
1384 //
1385 StackSize = Inst.getOffset() / StackDivide;
1386 break;
1387 }
1388 case MCCFIInstruction::OpOffset: {
1389 // Defines a "push" of a callee-saved register. E.g.
1390 //
1391 // pushq %r15
1392 // pushq %r14
1393 // pushq %rbx
1394 // L0:
1395 // subq $120, %rsp
1396 // L1:
1397 // .cfi_offset %rbx, -40
1398 // .cfi_offset %r14, -32
1399 // .cfi_offset %r15, -24
1400 //
1401 if (SavedRegIdx == CU_NUM_SAVED_REGS)
1402 // If there are too many saved registers, we cannot use a compact
1403 // unwind encoding.
1404 return CU::UNWIND_MODE_DWARF;
1405
1406 unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true);
1407 SavedRegs[SavedRegIdx++] = Reg;
1408 StackAdjust += OffsetSize;
1409 MinAbsOffset = std::min(MinAbsOffset, abs(Inst.getOffset()));
1410 InstrOffset += PushInstrSize(Reg);
1411 break;
1412 }
1413 }
1414 }
1415
1416 StackAdjust /= StackDivide;
1417
1418 if (HasFP) {
1419 if ((StackAdjust & 0xFF) != StackAdjust)
1420 // Offset was too big for a compact unwind encoding.
1421 return CU::UNWIND_MODE_DWARF;
1422
1423 // We don't attempt to track a real StackAdjust, so if the saved registers
1424 // aren't adjacent to rbp we can't cope.
1425 if (SavedRegIdx != 0 && MinAbsOffset != 3 * (int)OffsetSize)
1426 return CU::UNWIND_MODE_DWARF;
1427
1428 // Get the encoding of the saved registers when we have a frame pointer.
1429 uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame();
1430 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1431
1432 CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME;
1433 CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
1434 CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
1435 } else {
1436 SubtractInstrIdx += InstrOffset;
1437 ++StackAdjust;
1438
1439 if ((StackSize & 0xFF) == StackSize) {
1440 // Frameless stack with a small stack size.
1441 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD;
1442
1443 // Encode the stack size.
1444 CompactUnwindEncoding |= (StackSize & 0xFF) << 16;
1445 } else {
1446 if ((StackAdjust & 0x7) != StackAdjust)
1447 // The extra stack adjustments are too big for us to handle.
1448 return CU::UNWIND_MODE_DWARF;
1449
1450 // Frameless stack with an offset too large for us to encode compactly.
1451 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND;
1452
1453 // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
1454 // instruction.
1455 CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
1456
1457 // Encode any extra stack adjustments (done via push instructions).
1458 CompactUnwindEncoding |= (StackAdjust & 0x7) << 13;
1459 }
1460
1461 // Encode the number of registers saved. (Reverse the list first.)
1462 std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]);
1463 CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10;
1464
1465 // Get the encoding of the saved registers when we don't have a frame
1466 // pointer.
1467 uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx);
1468 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1469
1470 // Encode the register encoding.
1471 CompactUnwindEncoding |=
1472 RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION;
1473 }
1474
1475 return CompactUnwindEncoding;
1476 }
1477 };
1478
1479 } // end anonymous namespace
1480
createX86_32AsmBackend(const Target & T,const MCSubtargetInfo & STI,const MCRegisterInfo & MRI,const MCTargetOptions & Options)1481 MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
1482 const MCSubtargetInfo &STI,
1483 const MCRegisterInfo &MRI,
1484 const MCTargetOptions &Options) {
1485 const Triple &TheTriple = STI.getTargetTriple();
1486 if (TheTriple.isOSBinFormatMachO())
1487 return new DarwinX86AsmBackend(T, MRI, STI);
1488
1489 if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1490 return new WindowsX86AsmBackend(T, false, STI);
1491
1492 uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1493
1494 if (TheTriple.isOSIAMCU())
1495 return new ELFX86_IAMCUAsmBackend(T, OSABI, STI);
1496
1497 return new ELFX86_32AsmBackend(T, OSABI, STI);
1498 }
1499
createX86_64AsmBackend(const Target & T,const MCSubtargetInfo & STI,const MCRegisterInfo & MRI,const MCTargetOptions & Options)1500 MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
1501 const MCSubtargetInfo &STI,
1502 const MCRegisterInfo &MRI,
1503 const MCTargetOptions &Options) {
1504 const Triple &TheTriple = STI.getTargetTriple();
1505 if (TheTriple.isOSBinFormatMachO())
1506 return new DarwinX86AsmBackend(T, MRI, STI);
1507
1508 if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1509 return new WindowsX86AsmBackend(T, true, STI);
1510
1511 uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1512
1513 if (TheTriple.isX32())
1514 return new ELFX86_X32AsmBackend(T, OSABI, STI);
1515 return new ELFX86_64AsmBackend(T, OSABI, STI);
1516 }
1517