1 //===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "MCTargetDesc/X86BaseInfo.h"
10 #include "MCTargetDesc/X86FixupKinds.h"
11 #include "MCTargetDesc/X86EncodingOptimization.h"
12 #include "llvm/ADT/StringSwitch.h"
13 #include "llvm/BinaryFormat/ELF.h"
14 #include "llvm/BinaryFormat/MachO.h"
15 #include "llvm/MC/MCAsmBackend.h"
16 #include "llvm/MC/MCAsmLayout.h"
17 #include "llvm/MC/MCAssembler.h"
18 #include "llvm/MC/MCCodeEmitter.h"
19 #include "llvm/MC/MCContext.h"
20 #include "llvm/MC/MCDwarf.h"
21 #include "llvm/MC/MCELFObjectWriter.h"
22 #include "llvm/MC/MCExpr.h"
23 #include "llvm/MC/MCFixupKindInfo.h"
24 #include "llvm/MC/MCInst.h"
25 #include "llvm/MC/MCInstrInfo.h"
26 #include "llvm/MC/MCMachObjectWriter.h"
27 #include "llvm/MC/MCObjectStreamer.h"
28 #include "llvm/MC/MCObjectWriter.h"
29 #include "llvm/MC/MCRegisterInfo.h"
30 #include "llvm/MC/MCSectionMachO.h"
31 #include "llvm/MC/MCSubtargetInfo.h"
32 #include "llvm/MC/MCValue.h"
33 #include "llvm/MC/TargetRegistry.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Support/raw_ostream.h"
37
38 using namespace llvm;
39
40 namespace {
41 /// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind
42 class X86AlignBranchKind {
43 private:
44 uint8_t AlignBranchKind = 0;
45
46 public:
operator =(const std::string & Val)47 void operator=(const std::string &Val) {
48 if (Val.empty())
49 return;
50 SmallVector<StringRef, 6> BranchTypes;
51 StringRef(Val).split(BranchTypes, '+', -1, false);
52 for (auto BranchType : BranchTypes) {
53 if (BranchType == "fused")
54 addKind(X86::AlignBranchFused);
55 else if (BranchType == "jcc")
56 addKind(X86::AlignBranchJcc);
57 else if (BranchType == "jmp")
58 addKind(X86::AlignBranchJmp);
59 else if (BranchType == "call")
60 addKind(X86::AlignBranchCall);
61 else if (BranchType == "ret")
62 addKind(X86::AlignBranchRet);
63 else if (BranchType == "indirect")
64 addKind(X86::AlignBranchIndirect);
65 else {
66 errs() << "invalid argument " << BranchType.str()
67 << " to -x86-align-branch=; each element must be one of: fused, "
68 "jcc, jmp, call, ret, indirect.(plus separated)\n";
69 }
70 }
71 }
72
operator uint8_t() const73 operator uint8_t() const { return AlignBranchKind; }
addKind(X86::AlignBranchBoundaryKind Value)74 void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; }
75 };
76
77 X86AlignBranchKind X86AlignBranchKindLoc;
78
79 cl::opt<unsigned> X86AlignBranchBoundary(
80 "x86-align-branch-boundary", cl::init(0),
81 cl::desc(
82 "Control how the assembler should align branches with NOP. If the "
83 "boundary's size is not 0, it should be a power of 2 and no less "
84 "than 32. Branches will be aligned to prevent from being across or "
85 "against the boundary of specified size. The default value 0 does not "
86 "align branches."));
87
88 cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch(
89 "x86-align-branch",
90 cl::desc(
91 "Specify types of branches to align (plus separated list of types):"
92 "\njcc indicates conditional jumps"
93 "\nfused indicates fused conditional jumps"
94 "\njmp indicates direct unconditional jumps"
95 "\ncall indicates direct and indirect calls"
96 "\nret indicates rets"
97 "\nindirect indicates indirect unconditional jumps"),
98 cl::location(X86AlignBranchKindLoc));
99
100 cl::opt<bool> X86AlignBranchWithin32BBoundaries(
101 "x86-branches-within-32B-boundaries", cl::init(false),
102 cl::desc(
103 "Align selected instructions to mitigate negative performance impact "
104 "of Intel's micro code update for errata skx102. May break "
105 "assumptions about labels corresponding to particular instructions, "
106 "and should be used with caution."));
107
108 cl::opt<unsigned> X86PadMaxPrefixSize(
109 "x86-pad-max-prefix-size", cl::init(0),
110 cl::desc("Maximum number of prefixes to use for padding"));
111
112 cl::opt<bool> X86PadForAlign(
113 "x86-pad-for-align", cl::init(false), cl::Hidden,
114 cl::desc("Pad previous instructions to implement align directives"));
115
116 cl::opt<bool> X86PadForBranchAlign(
117 "x86-pad-for-branch-align", cl::init(true), cl::Hidden,
118 cl::desc("Pad previous instructions to implement branch alignment"));
119
120 class X86AsmBackend : public MCAsmBackend {
121 const MCSubtargetInfo &STI;
122 std::unique_ptr<const MCInstrInfo> MCII;
123 X86AlignBranchKind AlignBranchType;
124 Align AlignBoundary;
125 unsigned TargetPrefixMax = 0;
126
127 MCInst PrevInst;
128 MCBoundaryAlignFragment *PendingBA = nullptr;
129 std::pair<MCFragment *, size_t> PrevInstPosition;
130 bool CanPadInst = false;
131
132 uint8_t determinePaddingPrefix(const MCInst &Inst) const;
133 bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;
134 bool needAlign(const MCInst &Inst) const;
135 bool canPadBranches(MCObjectStreamer &OS) const;
136 bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const;
137
138 public:
X86AsmBackend(const Target & T,const MCSubtargetInfo & STI)139 X86AsmBackend(const Target &T, const MCSubtargetInfo &STI)
140 : MCAsmBackend(llvm::endianness::little), STI(STI),
141 MCII(T.createMCInstrInfo()) {
142 if (X86AlignBranchWithin32BBoundaries) {
143 // At the moment, this defaults to aligning fused branches, unconditional
144 // jumps, and (unfused) conditional jumps with nops. Both the
145 // instructions aligned and the alignment method (nop vs prefix) may
146 // change in the future.
147 AlignBoundary = assumeAligned(32);
148 AlignBranchType.addKind(X86::AlignBranchFused);
149 AlignBranchType.addKind(X86::AlignBranchJcc);
150 AlignBranchType.addKind(X86::AlignBranchJmp);
151 }
152 // Allow overriding defaults set by main flag
153 if (X86AlignBranchBoundary.getNumOccurrences())
154 AlignBoundary = assumeAligned(X86AlignBranchBoundary);
155 if (X86AlignBranch.getNumOccurrences())
156 AlignBranchType = X86AlignBranchKindLoc;
157 if (X86PadMaxPrefixSize.getNumOccurrences())
158 TargetPrefixMax = X86PadMaxPrefixSize;
159 }
160
161 bool allowAutoPadding() const override;
162 bool allowEnhancedRelaxation() const override;
163 void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst,
164 const MCSubtargetInfo &STI) override;
165 void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) override;
166
getNumFixupKinds() const167 unsigned getNumFixupKinds() const override {
168 return X86::NumTargetFixupKinds;
169 }
170
171 std::optional<MCFixupKind> getFixupKind(StringRef Name) const override;
172
173 const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
174
175 bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
176 const MCValue &Target,
177 const MCSubtargetInfo *STI) override;
178
179 void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
180 const MCValue &Target, MutableArrayRef<char> Data,
181 uint64_t Value, bool IsResolved,
182 const MCSubtargetInfo *STI) const override;
183
184 bool mayNeedRelaxation(const MCInst &Inst,
185 const MCSubtargetInfo &STI) const override;
186
187 bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
188 const MCRelaxableFragment *DF,
189 const MCAsmLayout &Layout) const override;
190
191 void relaxInstruction(MCInst &Inst,
192 const MCSubtargetInfo &STI) const override;
193
194 bool padInstructionViaRelaxation(MCRelaxableFragment &RF,
195 MCCodeEmitter &Emitter,
196 unsigned &RemainingSize) const;
197
198 bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
199 unsigned &RemainingSize) const;
200
201 bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
202 unsigned &RemainingSize) const;
203
204 void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override;
205
206 unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const override;
207
208 bool writeNopData(raw_ostream &OS, uint64_t Count,
209 const MCSubtargetInfo *STI) const override;
210 };
211 } // end anonymous namespace
212
isRelaxableBranch(unsigned Opcode)213 static bool isRelaxableBranch(unsigned Opcode) {
214 return Opcode == X86::JCC_1 || Opcode == X86::JMP_1;
215 }
216
getRelaxedOpcodeBranch(unsigned Opcode,bool Is16BitMode=false)217 static unsigned getRelaxedOpcodeBranch(unsigned Opcode,
218 bool Is16BitMode = false) {
219 switch (Opcode) {
220 default:
221 llvm_unreachable("invalid opcode for branch");
222 case X86::JCC_1:
223 return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4;
224 case X86::JMP_1:
225 return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4;
226 }
227 }
228
getRelaxedOpcode(const MCInst & MI,bool Is16BitMode)229 static unsigned getRelaxedOpcode(const MCInst &MI, bool Is16BitMode) {
230 unsigned Opcode = MI.getOpcode();
231 return isRelaxableBranch(Opcode) ? getRelaxedOpcodeBranch(Opcode, Is16BitMode)
232 : X86::getOpcodeForLongImmediateForm(Opcode);
233 }
234
getCondFromBranch(const MCInst & MI,const MCInstrInfo & MCII)235 static X86::CondCode getCondFromBranch(const MCInst &MI,
236 const MCInstrInfo &MCII) {
237 unsigned Opcode = MI.getOpcode();
238 switch (Opcode) {
239 default:
240 return X86::COND_INVALID;
241 case X86::JCC_1: {
242 const MCInstrDesc &Desc = MCII.get(Opcode);
243 return static_cast<X86::CondCode>(
244 MI.getOperand(Desc.getNumOperands() - 1).getImm());
245 }
246 }
247 }
248
249 static X86::SecondMacroFusionInstKind
classifySecondInstInMacroFusion(const MCInst & MI,const MCInstrInfo & MCII)250 classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) {
251 X86::CondCode CC = getCondFromBranch(MI, MCII);
252 return classifySecondCondCodeInMacroFusion(CC);
253 }
254
255 /// Check if the instruction uses RIP relative addressing.
isRIPRelative(const MCInst & MI,const MCInstrInfo & MCII)256 static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) {
257 unsigned Opcode = MI.getOpcode();
258 const MCInstrDesc &Desc = MCII.get(Opcode);
259 uint64_t TSFlags = Desc.TSFlags;
260 unsigned CurOp = X86II::getOperandBias(Desc);
261 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
262 if (MemoryOperand < 0)
263 return false;
264 unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg;
265 unsigned BaseReg = MI.getOperand(BaseRegNum).getReg();
266 return (BaseReg == X86::RIP);
267 }
268
269 /// Check if the instruction is a prefix.
isPrefix(const MCInst & MI,const MCInstrInfo & MCII)270 static bool isPrefix(const MCInst &MI, const MCInstrInfo &MCII) {
271 return X86II::isPrefix(MCII.get(MI.getOpcode()).TSFlags);
272 }
273
274 /// Check if the instruction is valid as the first instruction in macro fusion.
isFirstMacroFusibleInst(const MCInst & Inst,const MCInstrInfo & MCII)275 static bool isFirstMacroFusibleInst(const MCInst &Inst,
276 const MCInstrInfo &MCII) {
277 // An Intel instruction with RIP relative addressing is not macro fusible.
278 if (isRIPRelative(Inst, MCII))
279 return false;
280 X86::FirstMacroFusionInstKind FIK =
281 X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode());
282 return FIK != X86::FirstMacroFusionInstKind::Invalid;
283 }
284
285 /// X86 can reduce the bytes of NOP by padding instructions with prefixes to
286 /// get a better peformance in some cases. Here, we determine which prefix is
287 /// the most suitable.
288 ///
289 /// If the instruction has a segment override prefix, use the existing one.
290 /// If the target is 64-bit, use the CS.
291 /// If the target is 32-bit,
292 /// - If the instruction has a ESP/EBP base register, use SS.
293 /// - Otherwise use DS.
determinePaddingPrefix(const MCInst & Inst) const294 uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const {
295 assert((STI.hasFeature(X86::Is32Bit) || STI.hasFeature(X86::Is64Bit)) &&
296 "Prefixes can be added only in 32-bit or 64-bit mode.");
297 const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
298 uint64_t TSFlags = Desc.TSFlags;
299
300 // Determine where the memory operand starts, if present.
301 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
302 if (MemoryOperand != -1)
303 MemoryOperand += X86II::getOperandBias(Desc);
304
305 unsigned SegmentReg = 0;
306 if (MemoryOperand >= 0) {
307 // Check for explicit segment override on memory operand.
308 SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg();
309 }
310
311 switch (TSFlags & X86II::FormMask) {
312 default:
313 break;
314 case X86II::RawFrmDstSrc: {
315 // Check segment override opcode prefix as needed (not for %ds).
316 if (Inst.getOperand(2).getReg() != X86::DS)
317 SegmentReg = Inst.getOperand(2).getReg();
318 break;
319 }
320 case X86II::RawFrmSrc: {
321 // Check segment override opcode prefix as needed (not for %ds).
322 if (Inst.getOperand(1).getReg() != X86::DS)
323 SegmentReg = Inst.getOperand(1).getReg();
324 break;
325 }
326 case X86II::RawFrmMemOffs: {
327 // Check segment override opcode prefix as needed.
328 SegmentReg = Inst.getOperand(1).getReg();
329 break;
330 }
331 }
332
333 if (SegmentReg != 0)
334 return X86::getSegmentOverridePrefixForReg(SegmentReg);
335
336 if (STI.hasFeature(X86::Is64Bit))
337 return X86::CS_Encoding;
338
339 if (MemoryOperand >= 0) {
340 unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg;
341 unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg();
342 if (BaseReg == X86::ESP || BaseReg == X86::EBP)
343 return X86::SS_Encoding;
344 }
345 return X86::DS_Encoding;
346 }
347
348 /// Check if the two instructions will be macro-fused on the target cpu.
isMacroFused(const MCInst & Cmp,const MCInst & Jcc) const349 bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const {
350 const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode());
351 if (!InstDesc.isConditionalBranch())
352 return false;
353 if (!isFirstMacroFusibleInst(Cmp, *MCII))
354 return false;
355 const X86::FirstMacroFusionInstKind CmpKind =
356 X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode());
357 const X86::SecondMacroFusionInstKind BranchKind =
358 classifySecondInstInMacroFusion(Jcc, *MCII);
359 return X86::isMacroFused(CmpKind, BranchKind);
360 }
361
362 /// Check if the instruction has a variant symbol operand.
hasVariantSymbol(const MCInst & MI)363 static bool hasVariantSymbol(const MCInst &MI) {
364 for (auto &Operand : MI) {
365 if (!Operand.isExpr())
366 continue;
367 const MCExpr &Expr = *Operand.getExpr();
368 if (Expr.getKind() == MCExpr::SymbolRef &&
369 cast<MCSymbolRefExpr>(Expr).getKind() != MCSymbolRefExpr::VK_None)
370 return true;
371 }
372 return false;
373 }
374
allowAutoPadding() const375 bool X86AsmBackend::allowAutoPadding() const {
376 return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone);
377 }
378
allowEnhancedRelaxation() const379 bool X86AsmBackend::allowEnhancedRelaxation() const {
380 return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign;
381 }
382
383 /// X86 has certain instructions which enable interrupts exactly one
384 /// instruction *after* the instruction which stores to SS. Return true if the
385 /// given instruction has such an interrupt delay slot.
hasInterruptDelaySlot(const MCInst & Inst)386 static bool hasInterruptDelaySlot(const MCInst &Inst) {
387 switch (Inst.getOpcode()) {
388 case X86::POPSS16:
389 case X86::POPSS32:
390 case X86::STI:
391 return true;
392
393 case X86::MOV16sr:
394 case X86::MOV32sr:
395 case X86::MOV64sr:
396 case X86::MOV16sm:
397 if (Inst.getOperand(0).getReg() == X86::SS)
398 return true;
399 break;
400 }
401 return false;
402 }
403
404 /// Check if the instruction to be emitted is right after any data.
405 static bool
isRightAfterData(MCFragment * CurrentFragment,const std::pair<MCFragment *,size_t> & PrevInstPosition)406 isRightAfterData(MCFragment *CurrentFragment,
407 const std::pair<MCFragment *, size_t> &PrevInstPosition) {
408 MCFragment *F = CurrentFragment;
409 // Empty data fragments may be created to prevent further data being
410 // added into the previous fragment, we need to skip them since they
411 // have no contents.
412 for (; isa_and_nonnull<MCDataFragment>(F); F = F->getPrevNode())
413 if (cast<MCDataFragment>(F)->getContents().size() != 0)
414 break;
415
416 // Since data is always emitted into a DataFragment, our check strategy is
417 // simple here.
418 // - If the fragment is a DataFragment
419 // - If it's not the fragment where the previous instruction is,
420 // returns true.
421 // - If it's the fragment holding the previous instruction but its
422 // size changed since the previous instruction was emitted into
423 // it, returns true.
424 // - Otherwise returns false.
425 // - If the fragment is not a DataFragment, returns false.
426 if (auto *DF = dyn_cast_or_null<MCDataFragment>(F))
427 return DF != PrevInstPosition.first ||
428 DF->getContents().size() != PrevInstPosition.second;
429
430 return false;
431 }
432
433 /// \returns the fragment size if it has instructions, otherwise returns 0.
getSizeForInstFragment(const MCFragment * F)434 static size_t getSizeForInstFragment(const MCFragment *F) {
435 if (!F || !F->hasInstructions())
436 return 0;
437 // MCEncodedFragmentWithContents being templated makes this tricky.
438 switch (F->getKind()) {
439 default:
440 llvm_unreachable("Unknown fragment with instructions!");
441 case MCFragment::FT_Data:
442 return cast<MCDataFragment>(*F).getContents().size();
443 case MCFragment::FT_Relaxable:
444 return cast<MCRelaxableFragment>(*F).getContents().size();
445 case MCFragment::FT_CompactEncodedInst:
446 return cast<MCCompactEncodedInstFragment>(*F).getContents().size();
447 }
448 }
449
450 /// Return true if we can insert NOP or prefixes automatically before the
451 /// the instruction to be emitted.
canPadInst(const MCInst & Inst,MCObjectStreamer & OS) const452 bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const {
453 if (hasVariantSymbol(Inst))
454 // Linker may rewrite the instruction with variant symbol operand(e.g.
455 // TLSCALL).
456 return false;
457
458 if (hasInterruptDelaySlot(PrevInst))
459 // If this instruction follows an interrupt enabling instruction with a one
460 // instruction delay, inserting a nop would change behavior.
461 return false;
462
463 if (isPrefix(PrevInst, *MCII))
464 // If this instruction follows a prefix, inserting a nop/prefix would change
465 // semantic.
466 return false;
467
468 if (isPrefix(Inst, *MCII))
469 // If this instruction is a prefix, inserting a prefix would change
470 // semantic.
471 return false;
472
473 if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition))
474 // If this instruction follows any data, there is no clear
475 // instruction boundary, inserting a nop/prefix would change semantic.
476 return false;
477
478 return true;
479 }
480
canPadBranches(MCObjectStreamer & OS) const481 bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const {
482 if (!OS.getAllowAutoPadding())
483 return false;
484 assert(allowAutoPadding() && "incorrect initialization!");
485
486 // We only pad in text section.
487 if (!OS.getCurrentSectionOnly()->getKind().isText())
488 return false;
489
490 // To be Done: Currently don't deal with Bundle cases.
491 if (OS.getAssembler().isBundlingEnabled())
492 return false;
493
494 // Branches only need to be aligned in 32-bit or 64-bit mode.
495 if (!(STI.hasFeature(X86::Is64Bit) || STI.hasFeature(X86::Is32Bit)))
496 return false;
497
498 return true;
499 }
500
501 /// Check if the instruction operand needs to be aligned.
needAlign(const MCInst & Inst) const502 bool X86AsmBackend::needAlign(const MCInst &Inst) const {
503 const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
504 return (Desc.isConditionalBranch() &&
505 (AlignBranchType & X86::AlignBranchJcc)) ||
506 (Desc.isUnconditionalBranch() &&
507 (AlignBranchType & X86::AlignBranchJmp)) ||
508 (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) ||
509 (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) ||
510 (Desc.isIndirectBranch() &&
511 (AlignBranchType & X86::AlignBranchIndirect));
512 }
513
514 /// Insert BoundaryAlignFragment before instructions to align branches.
emitInstructionBegin(MCObjectStreamer & OS,const MCInst & Inst,const MCSubtargetInfo & STI)515 void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
516 const MCInst &Inst, const MCSubtargetInfo &STI) {
517 CanPadInst = canPadInst(Inst, OS);
518
519 if (!canPadBranches(OS))
520 return;
521
522 if (!isMacroFused(PrevInst, Inst))
523 // Macro fusion doesn't happen indeed, clear the pending.
524 PendingBA = nullptr;
525
526 if (!CanPadInst)
527 return;
528
529 if (PendingBA && OS.getCurrentFragment()->getPrevNode() == PendingBA) {
530 // Macro fusion actually happens and there is no other fragment inserted
531 // after the previous instruction.
532 //
533 // Do nothing here since we already inserted a BoudaryAlign fragment when
534 // we met the first instruction in the fused pair and we'll tie them
535 // together in emitInstructionEnd.
536 //
537 // Note: When there is at least one fragment, such as MCAlignFragment,
538 // inserted after the previous instruction, e.g.
539 //
540 // \code
541 // cmp %rax %rcx
542 // .align 16
543 // je .Label0
544 // \ endcode
545 //
546 // We will treat the JCC as a unfused branch although it may be fused
547 // with the CMP.
548 return;
549 }
550
551 if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) &&
552 isFirstMacroFusibleInst(Inst, *MCII))) {
553 // If we meet a unfused branch or the first instuction in a fusiable pair,
554 // insert a BoundaryAlign fragment.
555 OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary, STI));
556 }
557 }
558
559 /// Set the last fragment to be aligned for the BoundaryAlignFragment.
emitInstructionEnd(MCObjectStreamer & OS,const MCInst & Inst)560 void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) {
561 PrevInst = Inst;
562 MCFragment *CF = OS.getCurrentFragment();
563 PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF));
564 if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF))
565 F->setAllowAutoPadding(CanPadInst);
566
567 if (!canPadBranches(OS))
568 return;
569
570 if (!needAlign(Inst) || !PendingBA)
571 return;
572
573 // Tie the aligned instructions into a pending BoundaryAlign.
574 PendingBA->setLastFragment(CF);
575 PendingBA = nullptr;
576
577 // We need to ensure that further data isn't added to the current
578 // DataFragment, so that we can get the size of instructions later in
579 // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty
580 // DataFragment.
581 if (isa_and_nonnull<MCDataFragment>(CF))
582 OS.insert(new MCDataFragment());
583
584 // Update the maximum alignment on the current section if necessary.
585 MCSection *Sec = OS.getCurrentSectionOnly();
586 Sec->ensureMinAlignment(AlignBoundary);
587 }
588
getFixupKind(StringRef Name) const589 std::optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const {
590 if (STI.getTargetTriple().isOSBinFormatELF()) {
591 unsigned Type;
592 if (STI.getTargetTriple().getArch() == Triple::x86_64) {
593 Type = llvm::StringSwitch<unsigned>(Name)
594 #define ELF_RELOC(X, Y) .Case(#X, Y)
595 #include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
596 #undef ELF_RELOC
597 .Case("BFD_RELOC_NONE", ELF::R_X86_64_NONE)
598 .Case("BFD_RELOC_8", ELF::R_X86_64_8)
599 .Case("BFD_RELOC_16", ELF::R_X86_64_16)
600 .Case("BFD_RELOC_32", ELF::R_X86_64_32)
601 .Case("BFD_RELOC_64", ELF::R_X86_64_64)
602 .Default(-1u);
603 } else {
604 Type = llvm::StringSwitch<unsigned>(Name)
605 #define ELF_RELOC(X, Y) .Case(#X, Y)
606 #include "llvm/BinaryFormat/ELFRelocs/i386.def"
607 #undef ELF_RELOC
608 .Case("BFD_RELOC_NONE", ELF::R_386_NONE)
609 .Case("BFD_RELOC_8", ELF::R_386_8)
610 .Case("BFD_RELOC_16", ELF::R_386_16)
611 .Case("BFD_RELOC_32", ELF::R_386_32)
612 .Default(-1u);
613 }
614 if (Type == -1u)
615 return std::nullopt;
616 return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
617 }
618 return MCAsmBackend::getFixupKind(Name);
619 }
620
getFixupKindInfo(MCFixupKind Kind) const621 const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
622 const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
623 {"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
624 {"reloc_riprel_4byte_movq_load", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
625 {"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
626 {"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
627 {"reloc_signed_4byte", 0, 32, 0},
628 {"reloc_signed_4byte_relax", 0, 32, 0},
629 {"reloc_global_offset_table", 0, 32, 0},
630 {"reloc_global_offset_table8", 0, 64, 0},
631 {"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
632 };
633
634 // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They
635 // do not require any extra processing.
636 if (Kind >= FirstLiteralRelocationKind)
637 return MCAsmBackend::getFixupKindInfo(FK_NONE);
638
639 if (Kind < FirstTargetFixupKind)
640 return MCAsmBackend::getFixupKindInfo(Kind);
641
642 assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
643 "Invalid kind!");
644 assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!");
645 return Infos[Kind - FirstTargetFixupKind];
646 }
647
shouldForceRelocation(const MCAssembler &,const MCFixup & Fixup,const MCValue &,const MCSubtargetInfo * STI)648 bool X86AsmBackend::shouldForceRelocation(const MCAssembler &,
649 const MCFixup &Fixup, const MCValue &,
650 const MCSubtargetInfo *STI) {
651 return Fixup.getKind() >= FirstLiteralRelocationKind;
652 }
653
getFixupKindSize(unsigned Kind)654 static unsigned getFixupKindSize(unsigned Kind) {
655 switch (Kind) {
656 default:
657 llvm_unreachable("invalid fixup kind!");
658 case FK_NONE:
659 return 0;
660 case FK_PCRel_1:
661 case FK_SecRel_1:
662 case FK_Data_1:
663 return 1;
664 case FK_PCRel_2:
665 case FK_SecRel_2:
666 case FK_Data_2:
667 return 2;
668 case FK_PCRel_4:
669 case X86::reloc_riprel_4byte:
670 case X86::reloc_riprel_4byte_relax:
671 case X86::reloc_riprel_4byte_relax_rex:
672 case X86::reloc_riprel_4byte_movq_load:
673 case X86::reloc_signed_4byte:
674 case X86::reloc_signed_4byte_relax:
675 case X86::reloc_global_offset_table:
676 case X86::reloc_branch_4byte_pcrel:
677 case FK_SecRel_4:
678 case FK_Data_4:
679 return 4;
680 case FK_PCRel_8:
681 case FK_SecRel_8:
682 case FK_Data_8:
683 case X86::reloc_global_offset_table8:
684 return 8;
685 }
686 }
687
applyFixup(const MCAssembler & Asm,const MCFixup & Fixup,const MCValue & Target,MutableArrayRef<char> Data,uint64_t Value,bool IsResolved,const MCSubtargetInfo * STI) const688 void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
689 const MCValue &Target,
690 MutableArrayRef<char> Data,
691 uint64_t Value, bool IsResolved,
692 const MCSubtargetInfo *STI) const {
693 unsigned Kind = Fixup.getKind();
694 if (Kind >= FirstLiteralRelocationKind)
695 return;
696 unsigned Size = getFixupKindSize(Kind);
697
698 assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
699
700 int64_t SignedValue = static_cast<int64_t>(Value);
701 if ((Target.isAbsolute() || IsResolved) &&
702 getFixupKindInfo(Fixup.getKind()).Flags &
703 MCFixupKindInfo::FKF_IsPCRel) {
704 // check that PC relative fixup fits into the fixup size.
705 if (Size > 0 && !isIntN(Size * 8, SignedValue))
706 Asm.getContext().reportError(
707 Fixup.getLoc(), "value of " + Twine(SignedValue) +
708 " is too large for field of " + Twine(Size) +
709 ((Size == 1) ? " byte." : " bytes."));
710 } else {
711 // Check that uppper bits are either all zeros or all ones.
712 // Specifically ignore overflow/underflow as long as the leakage is
713 // limited to the lower bits. This is to remain compatible with
714 // other assemblers.
715 assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) &&
716 "Value does not fit in the Fixup field");
717 }
718
719 for (unsigned i = 0; i != Size; ++i)
720 Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
721 }
722
mayNeedRelaxation(const MCInst & MI,const MCSubtargetInfo & STI) const723 bool X86AsmBackend::mayNeedRelaxation(const MCInst &MI,
724 const MCSubtargetInfo &STI) const {
725 unsigned Opcode = MI.getOpcode();
726 return isRelaxableBranch(Opcode) ||
727 (X86::getOpcodeForLongImmediateForm(Opcode) != Opcode &&
728 MI.getOperand(MI.getNumOperands() - 1).isExpr());
729 }
730
fixupNeedsRelaxation(const MCFixup & Fixup,uint64_t Value,const MCRelaxableFragment * DF,const MCAsmLayout & Layout) const731 bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
732 uint64_t Value,
733 const MCRelaxableFragment *DF,
734 const MCAsmLayout &Layout) const {
735 // Relax if the value is too big for a (signed) i8.
736 return !isInt<8>(Value);
737 }
738
739 // FIXME: Can tblgen help at all here to verify there aren't other instructions
740 // we can relax?
relaxInstruction(MCInst & Inst,const MCSubtargetInfo & STI) const741 void X86AsmBackend::relaxInstruction(MCInst &Inst,
742 const MCSubtargetInfo &STI) const {
743 // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
744 bool Is16BitMode = STI.hasFeature(X86::Is16Bit);
745 unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode);
746
747 if (RelaxedOp == Inst.getOpcode()) {
748 SmallString<256> Tmp;
749 raw_svector_ostream OS(Tmp);
750 Inst.dump_pretty(OS);
751 OS << "\n";
752 report_fatal_error("unexpected instruction to relax: " + OS.str());
753 }
754
755 Inst.setOpcode(RelaxedOp);
756 }
757
padInstructionViaPrefix(MCRelaxableFragment & RF,MCCodeEmitter & Emitter,unsigned & RemainingSize) const758 bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF,
759 MCCodeEmitter &Emitter,
760 unsigned &RemainingSize) const {
761 if (!RF.getAllowAutoPadding())
762 return false;
763 // If the instruction isn't fully relaxed, shifting it around might require a
764 // larger value for one of the fixups then can be encoded. The outer loop
765 // will also catch this before moving to the next instruction, but we need to
766 // prevent padding this single instruction as well.
767 if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))
768 return false;
769
770 const unsigned OldSize = RF.getContents().size();
771 if (OldSize == 15)
772 return false;
773
774 const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize);
775 const unsigned RemainingPrefixSize = [&]() -> unsigned {
776 SmallString<15> Code;
777 Emitter.emitPrefix(RF.getInst(), Code, STI);
778 assert(Code.size() < 15 && "The number of prefixes must be less than 15.");
779
780 // TODO: It turns out we need a decent amount of plumbing for the target
781 // specific bits to determine number of prefixes its safe to add. Various
782 // targets (older chips mostly, but also Atom family) encounter decoder
783 // stalls with too many prefixes. For testing purposes, we set the value
784 // externally for the moment.
785 unsigned ExistingPrefixSize = Code.size();
786 if (TargetPrefixMax <= ExistingPrefixSize)
787 return 0;
788 return TargetPrefixMax - ExistingPrefixSize;
789 }();
790 const unsigned PrefixBytesToAdd =
791 std::min(MaxPossiblePad, RemainingPrefixSize);
792 if (PrefixBytesToAdd == 0)
793 return false;
794
795 const uint8_t Prefix = determinePaddingPrefix(RF.getInst());
796
797 SmallString<256> Code;
798 Code.append(PrefixBytesToAdd, Prefix);
799 Code.append(RF.getContents().begin(), RF.getContents().end());
800 RF.getContents() = Code;
801
802 // Adjust the fixups for the change in offsets
803 for (auto &F : RF.getFixups()) {
804 F.setOffset(F.getOffset() + PrefixBytesToAdd);
805 }
806
807 RemainingSize -= PrefixBytesToAdd;
808 return true;
809 }
810
padInstructionViaRelaxation(MCRelaxableFragment & RF,MCCodeEmitter & Emitter,unsigned & RemainingSize) const811 bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF,
812 MCCodeEmitter &Emitter,
813 unsigned &RemainingSize) const {
814 if (!mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))
815 // TODO: There are lots of other tricks we could apply for increasing
816 // encoding size without impacting performance.
817 return false;
818
819 MCInst Relaxed = RF.getInst();
820 relaxInstruction(Relaxed, *RF.getSubtargetInfo());
821
822 SmallVector<MCFixup, 4> Fixups;
823 SmallString<15> Code;
824 Emitter.encodeInstruction(Relaxed, Code, Fixups, *RF.getSubtargetInfo());
825 const unsigned OldSize = RF.getContents().size();
826 const unsigned NewSize = Code.size();
827 assert(NewSize >= OldSize && "size decrease during relaxation?");
828 unsigned Delta = NewSize - OldSize;
829 if (Delta > RemainingSize)
830 return false;
831 RF.setInst(Relaxed);
832 RF.getContents() = Code;
833 RF.getFixups() = Fixups;
834 RemainingSize -= Delta;
835 return true;
836 }
837
padInstructionEncoding(MCRelaxableFragment & RF,MCCodeEmitter & Emitter,unsigned & RemainingSize) const838 bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF,
839 MCCodeEmitter &Emitter,
840 unsigned &RemainingSize) const {
841 bool Changed = false;
842 if (RemainingSize != 0)
843 Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize);
844 if (RemainingSize != 0)
845 Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize);
846 return Changed;
847 }
848
finishLayout(MCAssembler const & Asm,MCAsmLayout & Layout) const849 void X86AsmBackend::finishLayout(MCAssembler const &Asm,
850 MCAsmLayout &Layout) const {
851 // See if we can further relax some instructions to cut down on the number of
852 // nop bytes required for code alignment. The actual win is in reducing
853 // instruction count, not number of bytes. Modern X86-64 can easily end up
854 // decode limited. It is often better to reduce the number of instructions
855 // (i.e. eliminate nops) even at the cost of increasing the size and
856 // complexity of others.
857 if (!X86PadForAlign && !X86PadForBranchAlign)
858 return;
859
860 // The processed regions are delimitered by LabeledFragments. -g may have more
861 // MCSymbols and therefore different relaxation results. X86PadForAlign is
862 // disabled by default to eliminate the -g vs non -g difference.
863 DenseSet<MCFragment *> LabeledFragments;
864 for (const MCSymbol &S : Asm.symbols())
865 LabeledFragments.insert(S.getFragment(false));
866
867 for (MCSection &Sec : Asm) {
868 if (!Sec.getKind().isText())
869 continue;
870
871 SmallVector<MCRelaxableFragment *, 4> Relaxable;
872 for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) {
873 MCFragment &F = *I;
874
875 if (LabeledFragments.count(&F))
876 Relaxable.clear();
877
878 if (F.getKind() == MCFragment::FT_Data ||
879 F.getKind() == MCFragment::FT_CompactEncodedInst)
880 // Skip and ignore
881 continue;
882
883 if (F.getKind() == MCFragment::FT_Relaxable) {
884 auto &RF = cast<MCRelaxableFragment>(*I);
885 Relaxable.push_back(&RF);
886 continue;
887 }
888
889 auto canHandle = [](MCFragment &F) -> bool {
890 switch (F.getKind()) {
891 default:
892 return false;
893 case MCFragment::FT_Align:
894 return X86PadForAlign;
895 case MCFragment::FT_BoundaryAlign:
896 return X86PadForBranchAlign;
897 }
898 };
899 // For any unhandled kind, assume we can't change layout.
900 if (!canHandle(F)) {
901 Relaxable.clear();
902 continue;
903 }
904
905 #ifndef NDEBUG
906 const uint64_t OrigOffset = Layout.getFragmentOffset(&F);
907 #endif
908 const uint64_t OrigSize = Asm.computeFragmentSize(Layout, F);
909
910 // To keep the effects local, prefer to relax instructions closest to
911 // the align directive. This is purely about human understandability
912 // of the resulting code. If we later find a reason to expand
913 // particular instructions over others, we can adjust.
914 MCFragment *FirstChangedFragment = nullptr;
915 unsigned RemainingSize = OrigSize;
916 while (!Relaxable.empty() && RemainingSize != 0) {
917 auto &RF = *Relaxable.pop_back_val();
918 // Give the backend a chance to play any tricks it wishes to increase
919 // the encoding size of the given instruction. Target independent code
920 // will try further relaxation, but target's may play further tricks.
921 if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize))
922 FirstChangedFragment = &RF;
923
924 // If we have an instruction which hasn't been fully relaxed, we can't
925 // skip past it and insert bytes before it. Changing its starting
926 // offset might require a larger negative offset than it can encode.
927 // We don't need to worry about larger positive offsets as none of the
928 // possible offsets between this and our align are visible, and the
929 // ones afterwards aren't changing.
930 if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))
931 break;
932 }
933 Relaxable.clear();
934
935 if (FirstChangedFragment) {
936 // Make sure the offsets for any fragments in the effected range get
937 // updated. Note that this (conservatively) invalidates the offsets of
938 // those following, but this is not required.
939 Layout.invalidateFragmentsFrom(FirstChangedFragment);
940 }
941
942 // BoundaryAlign explicitly tracks it's size (unlike align)
943 if (F.getKind() == MCFragment::FT_BoundaryAlign)
944 cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize);
945
946 #ifndef NDEBUG
947 const uint64_t FinalOffset = Layout.getFragmentOffset(&F);
948 const uint64_t FinalSize = Asm.computeFragmentSize(Layout, F);
949 assert(OrigOffset + OrigSize == FinalOffset + FinalSize &&
950 "can't move start of next fragment!");
951 assert(FinalSize == RemainingSize && "inconsistent size computation?");
952 #endif
953
954 // If we're looking at a boundary align, make sure we don't try to pad
955 // its target instructions for some following directive. Doing so would
956 // break the alignment of the current boundary align.
957 if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) {
958 const MCFragment *LastFragment = BF->getLastFragment();
959 if (!LastFragment)
960 continue;
961 while (&*I != LastFragment)
962 ++I;
963 }
964 }
965 }
966
967 // The layout is done. Mark every fragment as valid.
968 for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
969 MCSection &Section = *Layout.getSectionOrder()[i];
970 Layout.getFragmentOffset(&*Section.getFragmentList().rbegin());
971 Asm.computeFragmentSize(Layout, *Section.getFragmentList().rbegin());
972 }
973 }
974
getMaximumNopSize(const MCSubtargetInfo & STI) const975 unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const {
976 if (STI.hasFeature(X86::Is16Bit))
977 return 4;
978 if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Is64Bit))
979 return 1;
980 if (STI.hasFeature(X86::TuningFast7ByteNOP))
981 return 7;
982 if (STI.hasFeature(X86::TuningFast15ByteNOP))
983 return 15;
984 if (STI.hasFeature(X86::TuningFast11ByteNOP))
985 return 11;
986 // FIXME: handle 32-bit mode
987 // 15-bytes is the longest single NOP instruction, but 10-bytes is
988 // commonly the longest that can be efficiently decoded.
989 return 10;
990 }
991
992 /// Write a sequence of optimal nops to the output, covering \p Count
993 /// bytes.
994 /// \return - true on success, false on failure
writeNopData(raw_ostream & OS,uint64_t Count,const MCSubtargetInfo * STI) const995 bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
996 const MCSubtargetInfo *STI) const {
997 static const char Nops32Bit[10][11] = {
998 // nop
999 "\x90",
1000 // xchg %ax,%ax
1001 "\x66\x90",
1002 // nopl (%[re]ax)
1003 "\x0f\x1f\x00",
1004 // nopl 0(%[re]ax)
1005 "\x0f\x1f\x40\x00",
1006 // nopl 0(%[re]ax,%[re]ax,1)
1007 "\x0f\x1f\x44\x00\x00",
1008 // nopw 0(%[re]ax,%[re]ax,1)
1009 "\x66\x0f\x1f\x44\x00\x00",
1010 // nopl 0L(%[re]ax)
1011 "\x0f\x1f\x80\x00\x00\x00\x00",
1012 // nopl 0L(%[re]ax,%[re]ax,1)
1013 "\x0f\x1f\x84\x00\x00\x00\x00\x00",
1014 // nopw 0L(%[re]ax,%[re]ax,1)
1015 "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00",
1016 // nopw %cs:0L(%[re]ax,%[re]ax,1)
1017 "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00",
1018 };
1019
1020 // 16-bit mode uses different nop patterns than 32-bit.
1021 static const char Nops16Bit[4][11] = {
1022 // nop
1023 "\x90",
1024 // xchg %eax,%eax
1025 "\x66\x90",
1026 // lea 0(%si),%si
1027 "\x8d\x74\x00",
1028 // lea 0w(%si),%si
1029 "\x8d\xb4\x00\x00",
1030 };
1031
1032 const char(*Nops)[11] =
1033 STI->hasFeature(X86::Is16Bit) ? Nops16Bit : Nops32Bit;
1034
1035 uint64_t MaxNopLength = (uint64_t)getMaximumNopSize(*STI);
1036
1037 // Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining
1038 // length.
1039 do {
1040 const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength);
1041 const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;
1042 for (uint8_t i = 0; i < Prefixes; i++)
1043 OS << '\x66';
1044 const uint8_t Rest = ThisNopLength - Prefixes;
1045 if (Rest != 0)
1046 OS.write(Nops[Rest - 1], Rest);
1047 Count -= ThisNopLength;
1048 } while (Count != 0);
1049
1050 return true;
1051 }
1052
1053 /* *** */
1054
1055 namespace {
1056
1057 class ELFX86AsmBackend : public X86AsmBackend {
1058 public:
1059 uint8_t OSABI;
ELFX86AsmBackend(const Target & T,uint8_t OSABI,const MCSubtargetInfo & STI)1060 ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI)
1061 : X86AsmBackend(T, STI), OSABI(OSABI) {}
1062 };
1063
1064 class ELFX86_32AsmBackend : public ELFX86AsmBackend {
1065 public:
ELFX86_32AsmBackend(const Target & T,uint8_t OSABI,const MCSubtargetInfo & STI)1066 ELFX86_32AsmBackend(const Target &T, uint8_t OSABI,
1067 const MCSubtargetInfo &STI)
1068 : ELFX86AsmBackend(T, OSABI, STI) {}
1069
1070 std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const1071 createObjectTargetWriter() const override {
1072 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, ELF::EM_386);
1073 }
1074 };
1075
1076 class ELFX86_X32AsmBackend : public ELFX86AsmBackend {
1077 public:
ELFX86_X32AsmBackend(const Target & T,uint8_t OSABI,const MCSubtargetInfo & STI)1078 ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI,
1079 const MCSubtargetInfo &STI)
1080 : ELFX86AsmBackend(T, OSABI, STI) {}
1081
1082 std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const1083 createObjectTargetWriter() const override {
1084 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1085 ELF::EM_X86_64);
1086 }
1087 };
1088
1089 class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend {
1090 public:
ELFX86_IAMCUAsmBackend(const Target & T,uint8_t OSABI,const MCSubtargetInfo & STI)1091 ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI,
1092 const MCSubtargetInfo &STI)
1093 : ELFX86AsmBackend(T, OSABI, STI) {}
1094
1095 std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const1096 createObjectTargetWriter() const override {
1097 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1098 ELF::EM_IAMCU);
1099 }
1100 };
1101
1102 class ELFX86_64AsmBackend : public ELFX86AsmBackend {
1103 public:
ELFX86_64AsmBackend(const Target & T,uint8_t OSABI,const MCSubtargetInfo & STI)1104 ELFX86_64AsmBackend(const Target &T, uint8_t OSABI,
1105 const MCSubtargetInfo &STI)
1106 : ELFX86AsmBackend(T, OSABI, STI) {}
1107
1108 std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const1109 createObjectTargetWriter() const override {
1110 return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI, ELF::EM_X86_64);
1111 }
1112 };
1113
1114 class WindowsX86AsmBackend : public X86AsmBackend {
1115 bool Is64Bit;
1116
1117 public:
WindowsX86AsmBackend(const Target & T,bool is64Bit,const MCSubtargetInfo & STI)1118 WindowsX86AsmBackend(const Target &T, bool is64Bit,
1119 const MCSubtargetInfo &STI)
1120 : X86AsmBackend(T, STI)
1121 , Is64Bit(is64Bit) {
1122 }
1123
getFixupKind(StringRef Name) const1124 std::optional<MCFixupKind> getFixupKind(StringRef Name) const override {
1125 return StringSwitch<std::optional<MCFixupKind>>(Name)
1126 .Case("dir32", FK_Data_4)
1127 .Case("secrel32", FK_SecRel_4)
1128 .Case("secidx", FK_SecRel_2)
1129 .Default(MCAsmBackend::getFixupKind(Name));
1130 }
1131
1132 std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const1133 createObjectTargetWriter() const override {
1134 return createX86WinCOFFObjectWriter(Is64Bit);
1135 }
1136 };
1137
1138 namespace CU {
1139
1140 /// Compact unwind encoding values.
1141 enum CompactUnwindEncodings {
1142 /// [RE]BP based frame where [RE]BP is pused on the stack immediately after
1143 /// the return address, then [RE]SP is moved to [RE]BP.
1144 UNWIND_MODE_BP_FRAME = 0x01000000,
1145
1146 /// A frameless function with a small constant stack size.
1147 UNWIND_MODE_STACK_IMMD = 0x02000000,
1148
1149 /// A frameless function with a large constant stack size.
1150 UNWIND_MODE_STACK_IND = 0x03000000,
1151
1152 /// No compact unwind encoding is available.
1153 UNWIND_MODE_DWARF = 0x04000000,
1154
1155 /// Mask for encoding the frame registers.
1156 UNWIND_BP_FRAME_REGISTERS = 0x00007FFF,
1157
1158 /// Mask for encoding the frameless registers.
1159 UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
1160 };
1161
1162 } // namespace CU
1163
1164 class DarwinX86AsmBackend : public X86AsmBackend {
1165 const MCRegisterInfo &MRI;
1166
1167 /// Number of registers that can be saved in a compact unwind encoding.
1168 enum { CU_NUM_SAVED_REGS = 6 };
1169
1170 mutable unsigned SavedRegs[CU_NUM_SAVED_REGS];
1171 Triple TT;
1172 bool Is64Bit;
1173
1174 unsigned OffsetSize; ///< Offset of a "push" instruction.
1175 unsigned MoveInstrSize; ///< Size of a "move" instruction.
1176 unsigned StackDivide; ///< Amount to adjust stack size by.
1177 protected:
1178 /// Size of a "push" instruction for the given register.
PushInstrSize(unsigned Reg) const1179 unsigned PushInstrSize(unsigned Reg) const {
1180 switch (Reg) {
1181 case X86::EBX:
1182 case X86::ECX:
1183 case X86::EDX:
1184 case X86::EDI:
1185 case X86::ESI:
1186 case X86::EBP:
1187 case X86::RBX:
1188 case X86::RBP:
1189 return 1;
1190 case X86::R12:
1191 case X86::R13:
1192 case X86::R14:
1193 case X86::R15:
1194 return 2;
1195 }
1196 return 1;
1197 }
1198
1199 private:
1200 /// Get the compact unwind number for a given register. The number
1201 /// corresponds to the enum lists in compact_unwind_encoding.h.
getCompactUnwindRegNum(unsigned Reg) const1202 int getCompactUnwindRegNum(unsigned Reg) const {
1203 static const MCPhysReg CU32BitRegs[7] = {
1204 X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
1205 };
1206 static const MCPhysReg CU64BitRegs[] = {
1207 X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
1208 };
1209 const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
1210 for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
1211 if (*CURegs == Reg)
1212 return Idx;
1213
1214 return -1;
1215 }
1216
1217 /// Return the registers encoded for a compact encoding with a frame
1218 /// pointer.
encodeCompactUnwindRegistersWithFrame() const1219 uint32_t encodeCompactUnwindRegistersWithFrame() const {
1220 // Encode the registers in the order they were saved --- 3-bits per
1221 // register. The list of saved registers is assumed to be in reverse
1222 // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.
1223 uint32_t RegEnc = 0;
1224 for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) {
1225 unsigned Reg = SavedRegs[i];
1226 if (Reg == 0) break;
1227
1228 int CURegNum = getCompactUnwindRegNum(Reg);
1229 if (CURegNum == -1) return ~0U;
1230
1231 // Encode the 3-bit register number in order, skipping over 3-bits for
1232 // each register.
1233 RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
1234 }
1235
1236 assert((RegEnc & 0x3FFFF) == RegEnc &&
1237 "Invalid compact register encoding!");
1238 return RegEnc;
1239 }
1240
1241 /// Create the permutation encoding used with frameless stacks. It is
1242 /// passed the number of registers to be saved and an array of the registers
1243 /// saved.
encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const1244 uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const {
1245 // The saved registers are numbered from 1 to 6. In order to encode the
1246 // order in which they were saved, we re-number them according to their
1247 // place in the register order. The re-numbering is relative to the last
1248 // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in
1249 // that order:
1250 //
1251 // Orig Re-Num
1252 // ---- ------
1253 // 6 6
1254 // 2 2
1255 // 4 3
1256 // 5 3
1257 //
1258 for (unsigned i = 0; i < RegCount; ++i) {
1259 int CUReg = getCompactUnwindRegNum(SavedRegs[i]);
1260 if (CUReg == -1) return ~0U;
1261 SavedRegs[i] = CUReg;
1262 }
1263
1264 // Reverse the list.
1265 std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]);
1266
1267 uint32_t RenumRegs[CU_NUM_SAVED_REGS];
1268 for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){
1269 unsigned Countless = 0;
1270 for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
1271 if (SavedRegs[j] < SavedRegs[i])
1272 ++Countless;
1273
1274 RenumRegs[i] = SavedRegs[i] - Countless - 1;
1275 }
1276
1277 // Take the renumbered values and encode them into a 10-bit number.
1278 uint32_t permutationEncoding = 0;
1279 switch (RegCount) {
1280 case 6:
1281 permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
1282 + 6 * RenumRegs[2] + 2 * RenumRegs[3]
1283 + RenumRegs[4];
1284 break;
1285 case 5:
1286 permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
1287 + 6 * RenumRegs[3] + 2 * RenumRegs[4]
1288 + RenumRegs[5];
1289 break;
1290 case 4:
1291 permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3]
1292 + 3 * RenumRegs[4] + RenumRegs[5];
1293 break;
1294 case 3:
1295 permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4]
1296 + RenumRegs[5];
1297 break;
1298 case 2:
1299 permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5];
1300 break;
1301 case 1:
1302 permutationEncoding |= RenumRegs[5];
1303 break;
1304 }
1305
1306 assert((permutationEncoding & 0x3FF) == permutationEncoding &&
1307 "Invalid compact register encoding!");
1308 return permutationEncoding;
1309 }
1310
1311 public:
DarwinX86AsmBackend(const Target & T,const MCRegisterInfo & MRI,const MCSubtargetInfo & STI)1312 DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI,
1313 const MCSubtargetInfo &STI)
1314 : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()),
1315 Is64Bit(TT.isArch64Bit()) {
1316 memset(SavedRegs, 0, sizeof(SavedRegs));
1317 OffsetSize = Is64Bit ? 8 : 4;
1318 MoveInstrSize = Is64Bit ? 3 : 2;
1319 StackDivide = Is64Bit ? 8 : 4;
1320 }
1321
1322 std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const1323 createObjectTargetWriter() const override {
1324 uint32_t CPUType = cantFail(MachO::getCPUType(TT));
1325 uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT));
1326 return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType);
1327 }
1328
1329 /// Implementation of algorithm to generate the compact unwind encoding
1330 /// for the CFI instructions.
generateCompactUnwindEncoding(const MCDwarfFrameInfo * FI,const MCContext * Ctxt) const1331 uint32_t generateCompactUnwindEncoding(const MCDwarfFrameInfo *FI,
1332 const MCContext *Ctxt) const override {
1333 ArrayRef<MCCFIInstruction> Instrs = FI->Instructions;
1334 if (Instrs.empty()) return 0;
1335 if (!isDarwinCanonicalPersonality(FI->Personality) &&
1336 !Ctxt->emitCompactUnwindNonCanonical())
1337 return CU::UNWIND_MODE_DWARF;
1338
1339 // Reset the saved registers.
1340 unsigned SavedRegIdx = 0;
1341 memset(SavedRegs, 0, sizeof(SavedRegs));
1342
1343 bool HasFP = false;
1344
1345 // Encode that we are using EBP/RBP as the frame pointer.
1346 uint32_t CompactUnwindEncoding = 0;
1347
1348 unsigned SubtractInstrIdx = Is64Bit ? 3 : 2;
1349 unsigned InstrOffset = 0;
1350 unsigned StackAdjust = 0;
1351 unsigned StackSize = 0;
1352 int MinAbsOffset = std::numeric_limits<int>::max();
1353
1354 for (const MCCFIInstruction &Inst : Instrs) {
1355 switch (Inst.getOperation()) {
1356 default:
1357 // Any other CFI directives indicate a frame that we aren't prepared
1358 // to represent via compact unwind, so just bail out.
1359 return CU::UNWIND_MODE_DWARF;
1360 case MCCFIInstruction::OpDefCfaRegister: {
1361 // Defines a frame pointer. E.g.
1362 //
1363 // movq %rsp, %rbp
1364 // L0:
1365 // .cfi_def_cfa_register %rbp
1366 //
1367 HasFP = true;
1368
1369 // If the frame pointer is other than esp/rsp, we do not have a way to
1370 // generate a compact unwinding representation, so bail out.
1371 if (*MRI.getLLVMRegNum(Inst.getRegister(), true) !=
1372 (Is64Bit ? X86::RBP : X86::EBP))
1373 return CU::UNWIND_MODE_DWARF;
1374
1375 // Reset the counts.
1376 memset(SavedRegs, 0, sizeof(SavedRegs));
1377 StackAdjust = 0;
1378 SavedRegIdx = 0;
1379 MinAbsOffset = std::numeric_limits<int>::max();
1380 InstrOffset += MoveInstrSize;
1381 break;
1382 }
1383 case MCCFIInstruction::OpDefCfaOffset: {
1384 // Defines a new offset for the CFA. E.g.
1385 //
1386 // With frame:
1387 //
1388 // pushq %rbp
1389 // L0:
1390 // .cfi_def_cfa_offset 16
1391 //
1392 // Without frame:
1393 //
1394 // subq $72, %rsp
1395 // L0:
1396 // .cfi_def_cfa_offset 80
1397 //
1398 StackSize = Inst.getOffset() / StackDivide;
1399 break;
1400 }
1401 case MCCFIInstruction::OpOffset: {
1402 // Defines a "push" of a callee-saved register. E.g.
1403 //
1404 // pushq %r15
1405 // pushq %r14
1406 // pushq %rbx
1407 // L0:
1408 // subq $120, %rsp
1409 // L1:
1410 // .cfi_offset %rbx, -40
1411 // .cfi_offset %r14, -32
1412 // .cfi_offset %r15, -24
1413 //
1414 if (SavedRegIdx == CU_NUM_SAVED_REGS)
1415 // If there are too many saved registers, we cannot use a compact
1416 // unwind encoding.
1417 return CU::UNWIND_MODE_DWARF;
1418
1419 unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true);
1420 SavedRegs[SavedRegIdx++] = Reg;
1421 StackAdjust += OffsetSize;
1422 MinAbsOffset = std::min(MinAbsOffset, abs(Inst.getOffset()));
1423 InstrOffset += PushInstrSize(Reg);
1424 break;
1425 }
1426 }
1427 }
1428
1429 StackAdjust /= StackDivide;
1430
1431 if (HasFP) {
1432 if ((StackAdjust & 0xFF) != StackAdjust)
1433 // Offset was too big for a compact unwind encoding.
1434 return CU::UNWIND_MODE_DWARF;
1435
1436 // We don't attempt to track a real StackAdjust, so if the saved registers
1437 // aren't adjacent to rbp we can't cope.
1438 if (SavedRegIdx != 0 && MinAbsOffset != 3 * (int)OffsetSize)
1439 return CU::UNWIND_MODE_DWARF;
1440
1441 // Get the encoding of the saved registers when we have a frame pointer.
1442 uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame();
1443 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1444
1445 CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME;
1446 CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
1447 CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
1448 } else {
1449 SubtractInstrIdx += InstrOffset;
1450 ++StackAdjust;
1451
1452 if ((StackSize & 0xFF) == StackSize) {
1453 // Frameless stack with a small stack size.
1454 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD;
1455
1456 // Encode the stack size.
1457 CompactUnwindEncoding |= (StackSize & 0xFF) << 16;
1458 } else {
1459 if ((StackAdjust & 0x7) != StackAdjust)
1460 // The extra stack adjustments are too big for us to handle.
1461 return CU::UNWIND_MODE_DWARF;
1462
1463 // Frameless stack with an offset too large for us to encode compactly.
1464 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND;
1465
1466 // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
1467 // instruction.
1468 CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
1469
1470 // Encode any extra stack adjustments (done via push instructions).
1471 CompactUnwindEncoding |= (StackAdjust & 0x7) << 13;
1472 }
1473
1474 // Encode the number of registers saved. (Reverse the list first.)
1475 std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]);
1476 CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10;
1477
1478 // Get the encoding of the saved registers when we don't have a frame
1479 // pointer.
1480 uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx);
1481 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1482
1483 // Encode the register encoding.
1484 CompactUnwindEncoding |=
1485 RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION;
1486 }
1487
1488 return CompactUnwindEncoding;
1489 }
1490 };
1491
1492 } // end anonymous namespace
1493
createX86_32AsmBackend(const Target & T,const MCSubtargetInfo & STI,const MCRegisterInfo & MRI,const MCTargetOptions & Options)1494 MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
1495 const MCSubtargetInfo &STI,
1496 const MCRegisterInfo &MRI,
1497 const MCTargetOptions &Options) {
1498 const Triple &TheTriple = STI.getTargetTriple();
1499 if (TheTriple.isOSBinFormatMachO())
1500 return new DarwinX86AsmBackend(T, MRI, STI);
1501
1502 if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1503 return new WindowsX86AsmBackend(T, false, STI);
1504
1505 uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1506
1507 if (TheTriple.isOSIAMCU())
1508 return new ELFX86_IAMCUAsmBackend(T, OSABI, STI);
1509
1510 return new ELFX86_32AsmBackend(T, OSABI, STI);
1511 }
1512
createX86_64AsmBackend(const Target & T,const MCSubtargetInfo & STI,const MCRegisterInfo & MRI,const MCTargetOptions & Options)1513 MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
1514 const MCSubtargetInfo &STI,
1515 const MCRegisterInfo &MRI,
1516 const MCTargetOptions &Options) {
1517 const Triple &TheTriple = STI.getTargetTriple();
1518 if (TheTriple.isOSBinFormatMachO())
1519 return new DarwinX86AsmBackend(T, MRI, STI);
1520
1521 if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1522 return new WindowsX86AsmBackend(T, true, STI);
1523
1524 if (TheTriple.isUEFI()) {
1525 assert(TheTriple.isOSBinFormatCOFF() &&
1526 "Only COFF format is supported in UEFI environment.");
1527 return new WindowsX86AsmBackend(T, true, STI);
1528 }
1529
1530 uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1531
1532 if (TheTriple.isX32())
1533 return new ELFX86_X32AsmBackend(T, OSABI, STI);
1534 return new ELFX86_64AsmBackend(T, OSABI, STI);
1535 }
1536