1 //===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "MCTargetDesc/X86BaseInfo.h" 10 #include "MCTargetDesc/X86FixupKinds.h" 11 #include "MCTargetDesc/X86InstrRelaxTables.h" 12 #include "llvm/ADT/StringSwitch.h" 13 #include "llvm/BinaryFormat/ELF.h" 14 #include "llvm/BinaryFormat/MachO.h" 15 #include "llvm/MC/MCAsmBackend.h" 16 #include "llvm/MC/MCAsmLayout.h" 17 #include "llvm/MC/MCAssembler.h" 18 #include "llvm/MC/MCCodeEmitter.h" 19 #include "llvm/MC/MCContext.h" 20 #include "llvm/MC/MCDwarf.h" 21 #include "llvm/MC/MCELFObjectWriter.h" 22 #include "llvm/MC/MCExpr.h" 23 #include "llvm/MC/MCFixupKindInfo.h" 24 #include "llvm/MC/MCInst.h" 25 #include "llvm/MC/MCInstrInfo.h" 26 #include "llvm/MC/MCMachObjectWriter.h" 27 #include "llvm/MC/MCObjectStreamer.h" 28 #include "llvm/MC/MCObjectWriter.h" 29 #include "llvm/MC/MCRegisterInfo.h" 30 #include "llvm/MC/MCSectionMachO.h" 31 #include "llvm/MC/MCSubtargetInfo.h" 32 #include "llvm/MC/MCValue.h" 33 #include "llvm/MC/TargetRegistry.h" 34 #include "llvm/Support/CommandLine.h" 35 #include "llvm/Support/ErrorHandling.h" 36 #include "llvm/Support/raw_ostream.h" 37 38 using namespace llvm; 39 40 namespace { 41 /// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind 42 class X86AlignBranchKind { 43 private: 44 uint8_t AlignBranchKind = 0; 45 46 public: 47 void operator=(const std::string &Val) { 48 if (Val.empty()) 49 return; 50 SmallVector<StringRef, 6> BranchTypes; 51 StringRef(Val).split(BranchTypes, '+', -1, false); 52 for (auto BranchType : BranchTypes) { 53 if (BranchType == "fused") 54 addKind(X86::AlignBranchFused); 55 else if (BranchType == "jcc") 56 addKind(X86::AlignBranchJcc); 57 else if (BranchType == "jmp") 58 addKind(X86::AlignBranchJmp); 59 else if (BranchType == "call") 60 addKind(X86::AlignBranchCall); 61 else if (BranchType == "ret") 62 addKind(X86::AlignBranchRet); 63 else if (BranchType == "indirect") 64 addKind(X86::AlignBranchIndirect); 65 else { 66 errs() << "invalid argument " << BranchType.str() 67 << " to -x86-align-branch=; each element must be one of: fused, " 68 "jcc, jmp, call, ret, indirect.(plus separated)\n"; 69 } 70 } 71 } 72 73 operator uint8_t() const { return AlignBranchKind; } 74 void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; } 75 }; 76 77 X86AlignBranchKind X86AlignBranchKindLoc; 78 79 cl::opt<unsigned> X86AlignBranchBoundary( 80 "x86-align-branch-boundary", cl::init(0), 81 cl::desc( 82 "Control how the assembler should align branches with NOP. If the " 83 "boundary's size is not 0, it should be a power of 2 and no less " 84 "than 32. Branches will be aligned to prevent from being across or " 85 "against the boundary of specified size. The default value 0 does not " 86 "align branches.")); 87 88 cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch( 89 "x86-align-branch", 90 cl::desc( 91 "Specify types of branches to align (plus separated list of types):" 92 "\njcc indicates conditional jumps" 93 "\nfused indicates fused conditional jumps" 94 "\njmp indicates direct unconditional jumps" 95 "\ncall indicates direct and indirect calls" 96 "\nret indicates rets" 97 "\nindirect indicates indirect unconditional jumps"), 98 cl::location(X86AlignBranchKindLoc)); 99 100 cl::opt<bool> X86AlignBranchWithin32BBoundaries( 101 "x86-branches-within-32B-boundaries", cl::init(false), 102 cl::desc( 103 "Align selected instructions to mitigate negative performance impact " 104 "of Intel's micro code update for errata skx102. May break " 105 "assumptions about labels corresponding to particular instructions, " 106 "and should be used with caution.")); 107 108 cl::opt<unsigned> X86PadMaxPrefixSize( 109 "x86-pad-max-prefix-size", cl::init(0), 110 cl::desc("Maximum number of prefixes to use for padding")); 111 112 cl::opt<bool> X86PadForAlign( 113 "x86-pad-for-align", cl::init(false), cl::Hidden, 114 cl::desc("Pad previous instructions to implement align directives")); 115 116 cl::opt<bool> X86PadForBranchAlign( 117 "x86-pad-for-branch-align", cl::init(true), cl::Hidden, 118 cl::desc("Pad previous instructions to implement branch alignment")); 119 120 class X86AsmBackend : public MCAsmBackend { 121 const MCSubtargetInfo &STI; 122 std::unique_ptr<const MCInstrInfo> MCII; 123 X86AlignBranchKind AlignBranchType; 124 Align AlignBoundary; 125 unsigned TargetPrefixMax = 0; 126 127 MCInst PrevInst; 128 MCBoundaryAlignFragment *PendingBA = nullptr; 129 std::pair<MCFragment *, size_t> PrevInstPosition; 130 bool CanPadInst; 131 132 uint8_t determinePaddingPrefix(const MCInst &Inst) const; 133 bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const; 134 bool needAlign(const MCInst &Inst) const; 135 bool canPadBranches(MCObjectStreamer &OS) const; 136 bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const; 137 138 public: 139 X86AsmBackend(const Target &T, const MCSubtargetInfo &STI) 140 : MCAsmBackend(support::little), STI(STI), 141 MCII(T.createMCInstrInfo()) { 142 if (X86AlignBranchWithin32BBoundaries) { 143 // At the moment, this defaults to aligning fused branches, unconditional 144 // jumps, and (unfused) conditional jumps with nops. Both the 145 // instructions aligned and the alignment method (nop vs prefix) may 146 // change in the future. 147 AlignBoundary = assumeAligned(32);; 148 AlignBranchType.addKind(X86::AlignBranchFused); 149 AlignBranchType.addKind(X86::AlignBranchJcc); 150 AlignBranchType.addKind(X86::AlignBranchJmp); 151 } 152 // Allow overriding defaults set by main flag 153 if (X86AlignBranchBoundary.getNumOccurrences()) 154 AlignBoundary = assumeAligned(X86AlignBranchBoundary); 155 if (X86AlignBranch.getNumOccurrences()) 156 AlignBranchType = X86AlignBranchKindLoc; 157 if (X86PadMaxPrefixSize.getNumOccurrences()) 158 TargetPrefixMax = X86PadMaxPrefixSize; 159 } 160 161 bool allowAutoPadding() const override; 162 bool allowEnhancedRelaxation() const override; 163 void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst, 164 const MCSubtargetInfo &STI) override; 165 void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) override; 166 167 unsigned getNumFixupKinds() const override { 168 return X86::NumTargetFixupKinds; 169 } 170 171 std::optional<MCFixupKind> getFixupKind(StringRef Name) const override; 172 173 const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; 174 175 bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, 176 const MCValue &Target) override; 177 178 void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, 179 const MCValue &Target, MutableArrayRef<char> Data, 180 uint64_t Value, bool IsResolved, 181 const MCSubtargetInfo *STI) const override; 182 183 bool mayNeedRelaxation(const MCInst &Inst, 184 const MCSubtargetInfo &STI) const override; 185 186 bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, 187 const MCRelaxableFragment *DF, 188 const MCAsmLayout &Layout) const override; 189 190 void relaxInstruction(MCInst &Inst, 191 const MCSubtargetInfo &STI) const override; 192 193 bool padInstructionViaRelaxation(MCRelaxableFragment &RF, 194 MCCodeEmitter &Emitter, 195 unsigned &RemainingSize) const; 196 197 bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter, 198 unsigned &RemainingSize) const; 199 200 bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter, 201 unsigned &RemainingSize) const; 202 203 void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override; 204 205 unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const override; 206 207 bool writeNopData(raw_ostream &OS, uint64_t Count, 208 const MCSubtargetInfo *STI) const override; 209 }; 210 } // end anonymous namespace 211 212 static unsigned getRelaxedOpcodeBranch(const MCInst &Inst, bool Is16BitMode) { 213 unsigned Op = Inst.getOpcode(); 214 switch (Op) { 215 default: 216 return Op; 217 case X86::JCC_1: 218 return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4; 219 case X86::JMP_1: 220 return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4; 221 } 222 } 223 224 static unsigned getRelaxedOpcodeArith(const MCInst &Inst) { 225 unsigned Op = Inst.getOpcode(); 226 return X86::getRelaxedOpcodeArith(Op); 227 } 228 229 static unsigned getRelaxedOpcode(const MCInst &Inst, bool Is16BitMode) { 230 unsigned R = getRelaxedOpcodeArith(Inst); 231 if (R != Inst.getOpcode()) 232 return R; 233 return getRelaxedOpcodeBranch(Inst, Is16BitMode); 234 } 235 236 static X86::CondCode getCondFromBranch(const MCInst &MI, 237 const MCInstrInfo &MCII) { 238 unsigned Opcode = MI.getOpcode(); 239 switch (Opcode) { 240 default: 241 return X86::COND_INVALID; 242 case X86::JCC_1: { 243 const MCInstrDesc &Desc = MCII.get(Opcode); 244 return static_cast<X86::CondCode>( 245 MI.getOperand(Desc.getNumOperands() - 1).getImm()); 246 } 247 } 248 } 249 250 static X86::SecondMacroFusionInstKind 251 classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) { 252 X86::CondCode CC = getCondFromBranch(MI, MCII); 253 return classifySecondCondCodeInMacroFusion(CC); 254 } 255 256 /// Check if the instruction uses RIP relative addressing. 257 static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) { 258 unsigned Opcode = MI.getOpcode(); 259 const MCInstrDesc &Desc = MCII.get(Opcode); 260 uint64_t TSFlags = Desc.TSFlags; 261 unsigned CurOp = X86II::getOperandBias(Desc); 262 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags); 263 if (MemoryOperand < 0) 264 return false; 265 unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg; 266 unsigned BaseReg = MI.getOperand(BaseRegNum).getReg(); 267 return (BaseReg == X86::RIP); 268 } 269 270 /// Check if the instruction is a prefix. 271 static bool isPrefix(const MCInst &MI, const MCInstrInfo &MCII) { 272 return X86II::isPrefix(MCII.get(MI.getOpcode()).TSFlags); 273 } 274 275 /// Check if the instruction is valid as the first instruction in macro fusion. 276 static bool isFirstMacroFusibleInst(const MCInst &Inst, 277 const MCInstrInfo &MCII) { 278 // An Intel instruction with RIP relative addressing is not macro fusible. 279 if (isRIPRelative(Inst, MCII)) 280 return false; 281 X86::FirstMacroFusionInstKind FIK = 282 X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode()); 283 return FIK != X86::FirstMacroFusionInstKind::Invalid; 284 } 285 286 /// X86 can reduce the bytes of NOP by padding instructions with prefixes to 287 /// get a better peformance in some cases. Here, we determine which prefix is 288 /// the most suitable. 289 /// 290 /// If the instruction has a segment override prefix, use the existing one. 291 /// If the target is 64-bit, use the CS. 292 /// If the target is 32-bit, 293 /// - If the instruction has a ESP/EBP base register, use SS. 294 /// - Otherwise use DS. 295 uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const { 296 assert((STI.hasFeature(X86::Is32Bit) || STI.hasFeature(X86::Is64Bit)) && 297 "Prefixes can be added only in 32-bit or 64-bit mode."); 298 const MCInstrDesc &Desc = MCII->get(Inst.getOpcode()); 299 uint64_t TSFlags = Desc.TSFlags; 300 301 // Determine where the memory operand starts, if present. 302 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags); 303 if (MemoryOperand != -1) 304 MemoryOperand += X86II::getOperandBias(Desc); 305 306 unsigned SegmentReg = 0; 307 if (MemoryOperand >= 0) { 308 // Check for explicit segment override on memory operand. 309 SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg(); 310 } 311 312 switch (TSFlags & X86II::FormMask) { 313 default: 314 break; 315 case X86II::RawFrmDstSrc: { 316 // Check segment override opcode prefix as needed (not for %ds). 317 if (Inst.getOperand(2).getReg() != X86::DS) 318 SegmentReg = Inst.getOperand(2).getReg(); 319 break; 320 } 321 case X86II::RawFrmSrc: { 322 // Check segment override opcode prefix as needed (not for %ds). 323 if (Inst.getOperand(1).getReg() != X86::DS) 324 SegmentReg = Inst.getOperand(1).getReg(); 325 break; 326 } 327 case X86II::RawFrmMemOffs: { 328 // Check segment override opcode prefix as needed. 329 SegmentReg = Inst.getOperand(1).getReg(); 330 break; 331 } 332 } 333 334 if (SegmentReg != 0) 335 return X86::getSegmentOverridePrefixForReg(SegmentReg); 336 337 if (STI.hasFeature(X86::Is64Bit)) 338 return X86::CS_Encoding; 339 340 if (MemoryOperand >= 0) { 341 unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg; 342 unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg(); 343 if (BaseReg == X86::ESP || BaseReg == X86::EBP) 344 return X86::SS_Encoding; 345 } 346 return X86::DS_Encoding; 347 } 348 349 /// Check if the two instructions will be macro-fused on the target cpu. 350 bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const { 351 const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode()); 352 if (!InstDesc.isConditionalBranch()) 353 return false; 354 if (!isFirstMacroFusibleInst(Cmp, *MCII)) 355 return false; 356 const X86::FirstMacroFusionInstKind CmpKind = 357 X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode()); 358 const X86::SecondMacroFusionInstKind BranchKind = 359 classifySecondInstInMacroFusion(Jcc, *MCII); 360 return X86::isMacroFused(CmpKind, BranchKind); 361 } 362 363 /// Check if the instruction has a variant symbol operand. 364 static bool hasVariantSymbol(const MCInst &MI) { 365 for (auto &Operand : MI) { 366 if (!Operand.isExpr()) 367 continue; 368 const MCExpr &Expr = *Operand.getExpr(); 369 if (Expr.getKind() == MCExpr::SymbolRef && 370 cast<MCSymbolRefExpr>(Expr).getKind() != MCSymbolRefExpr::VK_None) 371 return true; 372 } 373 return false; 374 } 375 376 bool X86AsmBackend::allowAutoPadding() const { 377 return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone); 378 } 379 380 bool X86AsmBackend::allowEnhancedRelaxation() const { 381 return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign; 382 } 383 384 /// X86 has certain instructions which enable interrupts exactly one 385 /// instruction *after* the instruction which stores to SS. Return true if the 386 /// given instruction has such an interrupt delay slot. 387 static bool hasInterruptDelaySlot(const MCInst &Inst) { 388 switch (Inst.getOpcode()) { 389 case X86::POPSS16: 390 case X86::POPSS32: 391 case X86::STI: 392 return true; 393 394 case X86::MOV16sr: 395 case X86::MOV32sr: 396 case X86::MOV64sr: 397 case X86::MOV16sm: 398 if (Inst.getOperand(0).getReg() == X86::SS) 399 return true; 400 break; 401 } 402 return false; 403 } 404 405 /// Check if the instruction to be emitted is right after any data. 406 static bool 407 isRightAfterData(MCFragment *CurrentFragment, 408 const std::pair<MCFragment *, size_t> &PrevInstPosition) { 409 MCFragment *F = CurrentFragment; 410 // Empty data fragments may be created to prevent further data being 411 // added into the previous fragment, we need to skip them since they 412 // have no contents. 413 for (; isa_and_nonnull<MCDataFragment>(F); F = F->getPrevNode()) 414 if (cast<MCDataFragment>(F)->getContents().size() != 0) 415 break; 416 417 // Since data is always emitted into a DataFragment, our check strategy is 418 // simple here. 419 // - If the fragment is a DataFragment 420 // - If it's not the fragment where the previous instruction is, 421 // returns true. 422 // - If it's the fragment holding the previous instruction but its 423 // size changed since the the previous instruction was emitted into 424 // it, returns true. 425 // - Otherwise returns false. 426 // - If the fragment is not a DataFragment, returns false. 427 if (auto *DF = dyn_cast_or_null<MCDataFragment>(F)) 428 return DF != PrevInstPosition.first || 429 DF->getContents().size() != PrevInstPosition.second; 430 431 return false; 432 } 433 434 /// \returns the fragment size if it has instructions, otherwise returns 0. 435 static size_t getSizeForInstFragment(const MCFragment *F) { 436 if (!F || !F->hasInstructions()) 437 return 0; 438 // MCEncodedFragmentWithContents being templated makes this tricky. 439 switch (F->getKind()) { 440 default: 441 llvm_unreachable("Unknown fragment with instructions!"); 442 case MCFragment::FT_Data: 443 return cast<MCDataFragment>(*F).getContents().size(); 444 case MCFragment::FT_Relaxable: 445 return cast<MCRelaxableFragment>(*F).getContents().size(); 446 case MCFragment::FT_CompactEncodedInst: 447 return cast<MCCompactEncodedInstFragment>(*F).getContents().size(); 448 } 449 } 450 451 /// Return true if we can insert NOP or prefixes automatically before the 452 /// the instruction to be emitted. 453 bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const { 454 if (hasVariantSymbol(Inst)) 455 // Linker may rewrite the instruction with variant symbol operand(e.g. 456 // TLSCALL). 457 return false; 458 459 if (hasInterruptDelaySlot(PrevInst)) 460 // If this instruction follows an interrupt enabling instruction with a one 461 // instruction delay, inserting a nop would change behavior. 462 return false; 463 464 if (isPrefix(PrevInst, *MCII)) 465 // If this instruction follows a prefix, inserting a nop/prefix would change 466 // semantic. 467 return false; 468 469 if (isPrefix(Inst, *MCII)) 470 // If this instruction is a prefix, inserting a prefix would change 471 // semantic. 472 return false; 473 474 if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition)) 475 // If this instruction follows any data, there is no clear 476 // instruction boundary, inserting a nop/prefix would change semantic. 477 return false; 478 479 return true; 480 } 481 482 bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const { 483 if (!OS.getAllowAutoPadding()) 484 return false; 485 assert(allowAutoPadding() && "incorrect initialization!"); 486 487 // We only pad in text section. 488 if (!OS.getCurrentSectionOnly()->getKind().isText()) 489 return false; 490 491 // To be Done: Currently don't deal with Bundle cases. 492 if (OS.getAssembler().isBundlingEnabled()) 493 return false; 494 495 // Branches only need to be aligned in 32-bit or 64-bit mode. 496 if (!(STI.hasFeature(X86::Is64Bit) || STI.hasFeature(X86::Is32Bit))) 497 return false; 498 499 return true; 500 } 501 502 /// Check if the instruction operand needs to be aligned. 503 bool X86AsmBackend::needAlign(const MCInst &Inst) const { 504 const MCInstrDesc &Desc = MCII->get(Inst.getOpcode()); 505 return (Desc.isConditionalBranch() && 506 (AlignBranchType & X86::AlignBranchJcc)) || 507 (Desc.isUnconditionalBranch() && 508 (AlignBranchType & X86::AlignBranchJmp)) || 509 (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) || 510 (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) || 511 (Desc.isIndirectBranch() && 512 (AlignBranchType & X86::AlignBranchIndirect)); 513 } 514 515 /// Insert BoundaryAlignFragment before instructions to align branches. 516 void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS, 517 const MCInst &Inst, const MCSubtargetInfo &STI) { 518 CanPadInst = canPadInst(Inst, OS); 519 520 if (!canPadBranches(OS)) 521 return; 522 523 if (!isMacroFused(PrevInst, Inst)) 524 // Macro fusion doesn't happen indeed, clear the pending. 525 PendingBA = nullptr; 526 527 if (!CanPadInst) 528 return; 529 530 if (PendingBA && OS.getCurrentFragment()->getPrevNode() == PendingBA) { 531 // Macro fusion actually happens and there is no other fragment inserted 532 // after the previous instruction. 533 // 534 // Do nothing here since we already inserted a BoudaryAlign fragment when 535 // we met the first instruction in the fused pair and we'll tie them 536 // together in emitInstructionEnd. 537 // 538 // Note: When there is at least one fragment, such as MCAlignFragment, 539 // inserted after the previous instruction, e.g. 540 // 541 // \code 542 // cmp %rax %rcx 543 // .align 16 544 // je .Label0 545 // \ endcode 546 // 547 // We will treat the JCC as a unfused branch although it may be fused 548 // with the CMP. 549 return; 550 } 551 552 if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) && 553 isFirstMacroFusibleInst(Inst, *MCII))) { 554 // If we meet a unfused branch or the first instuction in a fusiable pair, 555 // insert a BoundaryAlign fragment. 556 OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary, STI)); 557 } 558 } 559 560 /// Set the last fragment to be aligned for the BoundaryAlignFragment. 561 void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) { 562 PrevInst = Inst; 563 MCFragment *CF = OS.getCurrentFragment(); 564 PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF)); 565 if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF)) 566 F->setAllowAutoPadding(CanPadInst); 567 568 if (!canPadBranches(OS)) 569 return; 570 571 if (!needAlign(Inst) || !PendingBA) 572 return; 573 574 // Tie the aligned instructions into a a pending BoundaryAlign. 575 PendingBA->setLastFragment(CF); 576 PendingBA = nullptr; 577 578 // We need to ensure that further data isn't added to the current 579 // DataFragment, so that we can get the size of instructions later in 580 // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty 581 // DataFragment. 582 if (isa_and_nonnull<MCDataFragment>(CF)) 583 OS.insert(new MCDataFragment()); 584 585 // Update the maximum alignment on the current section if necessary. 586 MCSection *Sec = OS.getCurrentSectionOnly(); 587 Sec->ensureMinAlignment(AlignBoundary); 588 } 589 590 std::optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const { 591 if (STI.getTargetTriple().isOSBinFormatELF()) { 592 unsigned Type; 593 if (STI.getTargetTriple().getArch() == Triple::x86_64) { 594 Type = llvm::StringSwitch<unsigned>(Name) 595 #define ELF_RELOC(X, Y) .Case(#X, Y) 596 #include "llvm/BinaryFormat/ELFRelocs/x86_64.def" 597 #undef ELF_RELOC 598 .Case("BFD_RELOC_NONE", ELF::R_X86_64_NONE) 599 .Case("BFD_RELOC_8", ELF::R_X86_64_8) 600 .Case("BFD_RELOC_16", ELF::R_X86_64_16) 601 .Case("BFD_RELOC_32", ELF::R_X86_64_32) 602 .Case("BFD_RELOC_64", ELF::R_X86_64_64) 603 .Default(-1u); 604 } else { 605 Type = llvm::StringSwitch<unsigned>(Name) 606 #define ELF_RELOC(X, Y) .Case(#X, Y) 607 #include "llvm/BinaryFormat/ELFRelocs/i386.def" 608 #undef ELF_RELOC 609 .Case("BFD_RELOC_NONE", ELF::R_386_NONE) 610 .Case("BFD_RELOC_8", ELF::R_386_8) 611 .Case("BFD_RELOC_16", ELF::R_386_16) 612 .Case("BFD_RELOC_32", ELF::R_386_32) 613 .Default(-1u); 614 } 615 if (Type == -1u) 616 return std::nullopt; 617 return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type); 618 } 619 return MCAsmBackend::getFixupKind(Name); 620 } 621 622 const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const { 623 const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = { 624 {"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 625 {"reloc_riprel_4byte_movq_load", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 626 {"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 627 {"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 628 {"reloc_signed_4byte", 0, 32, 0}, 629 {"reloc_signed_4byte_relax", 0, 32, 0}, 630 {"reloc_global_offset_table", 0, 32, 0}, 631 {"reloc_global_offset_table8", 0, 64, 0}, 632 {"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 633 }; 634 635 // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They 636 // do not require any extra processing. 637 if (Kind >= FirstLiteralRelocationKind) 638 return MCAsmBackend::getFixupKindInfo(FK_NONE); 639 640 if (Kind < FirstTargetFixupKind) 641 return MCAsmBackend::getFixupKindInfo(Kind); 642 643 assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && 644 "Invalid kind!"); 645 assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!"); 646 return Infos[Kind - FirstTargetFixupKind]; 647 } 648 649 bool X86AsmBackend::shouldForceRelocation(const MCAssembler &, 650 const MCFixup &Fixup, 651 const MCValue &) { 652 return Fixup.getKind() >= FirstLiteralRelocationKind; 653 } 654 655 static unsigned getFixupKindSize(unsigned Kind) { 656 switch (Kind) { 657 default: 658 llvm_unreachable("invalid fixup kind!"); 659 case FK_NONE: 660 return 0; 661 case FK_PCRel_1: 662 case FK_SecRel_1: 663 case FK_Data_1: 664 return 1; 665 case FK_PCRel_2: 666 case FK_SecRel_2: 667 case FK_Data_2: 668 return 2; 669 case FK_PCRel_4: 670 case X86::reloc_riprel_4byte: 671 case X86::reloc_riprel_4byte_relax: 672 case X86::reloc_riprel_4byte_relax_rex: 673 case X86::reloc_riprel_4byte_movq_load: 674 case X86::reloc_signed_4byte: 675 case X86::reloc_signed_4byte_relax: 676 case X86::reloc_global_offset_table: 677 case X86::reloc_branch_4byte_pcrel: 678 case FK_SecRel_4: 679 case FK_Data_4: 680 return 4; 681 case FK_PCRel_8: 682 case FK_SecRel_8: 683 case FK_Data_8: 684 case X86::reloc_global_offset_table8: 685 return 8; 686 } 687 } 688 689 void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, 690 const MCValue &Target, 691 MutableArrayRef<char> Data, 692 uint64_t Value, bool IsResolved, 693 const MCSubtargetInfo *STI) const { 694 unsigned Kind = Fixup.getKind(); 695 if (Kind >= FirstLiteralRelocationKind) 696 return; 697 unsigned Size = getFixupKindSize(Kind); 698 699 assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!"); 700 701 int64_t SignedValue = static_cast<int64_t>(Value); 702 if ((Target.isAbsolute() || IsResolved) && 703 getFixupKindInfo(Fixup.getKind()).Flags & 704 MCFixupKindInfo::FKF_IsPCRel) { 705 // check that PC relative fixup fits into the fixup size. 706 if (Size > 0 && !isIntN(Size * 8, SignedValue)) 707 Asm.getContext().reportError( 708 Fixup.getLoc(), "value of " + Twine(SignedValue) + 709 " is too large for field of " + Twine(Size) + 710 ((Size == 1) ? " byte." : " bytes.")); 711 } else { 712 // Check that uppper bits are either all zeros or all ones. 713 // Specifically ignore overflow/underflow as long as the leakage is 714 // limited to the lower bits. This is to remain compatible with 715 // other assemblers. 716 assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) && 717 "Value does not fit in the Fixup field"); 718 } 719 720 for (unsigned i = 0; i != Size; ++i) 721 Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8)); 722 } 723 724 bool X86AsmBackend::mayNeedRelaxation(const MCInst &Inst, 725 const MCSubtargetInfo &STI) const { 726 // Branches can always be relaxed in either mode. 727 if (getRelaxedOpcodeBranch(Inst, false) != Inst.getOpcode()) 728 return true; 729 730 // Check if this instruction is ever relaxable. 731 if (getRelaxedOpcodeArith(Inst) == Inst.getOpcode()) 732 return false; 733 734 735 // Check if the relaxable operand has an expression. For the current set of 736 // relaxable instructions, the relaxable operand is always the last operand. 737 unsigned RelaxableOp = Inst.getNumOperands() - 1; 738 if (Inst.getOperand(RelaxableOp).isExpr()) 739 return true; 740 741 return false; 742 } 743 744 bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, 745 uint64_t Value, 746 const MCRelaxableFragment *DF, 747 const MCAsmLayout &Layout) const { 748 // Relax if the value is too big for a (signed) i8. 749 return !isInt<8>(Value); 750 } 751 752 // FIXME: Can tblgen help at all here to verify there aren't other instructions 753 // we can relax? 754 void X86AsmBackend::relaxInstruction(MCInst &Inst, 755 const MCSubtargetInfo &STI) const { 756 // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel. 757 bool Is16BitMode = STI.getFeatureBits()[X86::Is16Bit]; 758 unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode); 759 760 if (RelaxedOp == Inst.getOpcode()) { 761 SmallString<256> Tmp; 762 raw_svector_ostream OS(Tmp); 763 Inst.dump_pretty(OS); 764 OS << "\n"; 765 report_fatal_error("unexpected instruction to relax: " + OS.str()); 766 } 767 768 Inst.setOpcode(RelaxedOp); 769 } 770 771 /// Return true if this instruction has been fully relaxed into it's most 772 /// general available form. 773 static bool isFullyRelaxed(const MCRelaxableFragment &RF) { 774 auto &Inst = RF.getInst(); 775 auto &STI = *RF.getSubtargetInfo(); 776 bool Is16BitMode = STI.getFeatureBits()[X86::Is16Bit]; 777 return getRelaxedOpcode(Inst, Is16BitMode) == Inst.getOpcode(); 778 } 779 780 bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF, 781 MCCodeEmitter &Emitter, 782 unsigned &RemainingSize) const { 783 if (!RF.getAllowAutoPadding()) 784 return false; 785 // If the instruction isn't fully relaxed, shifting it around might require a 786 // larger value for one of the fixups then can be encoded. The outer loop 787 // will also catch this before moving to the next instruction, but we need to 788 // prevent padding this single instruction as well. 789 if (!isFullyRelaxed(RF)) 790 return false; 791 792 const unsigned OldSize = RF.getContents().size(); 793 if (OldSize == 15) 794 return false; 795 796 const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize); 797 const unsigned RemainingPrefixSize = [&]() -> unsigned { 798 SmallString<15> Code; 799 raw_svector_ostream VecOS(Code); 800 Emitter.emitPrefix(RF.getInst(), VecOS, STI); 801 assert(Code.size() < 15 && "The number of prefixes must be less than 15."); 802 803 // TODO: It turns out we need a decent amount of plumbing for the target 804 // specific bits to determine number of prefixes its safe to add. Various 805 // targets (older chips mostly, but also Atom family) encounter decoder 806 // stalls with too many prefixes. For testing purposes, we set the value 807 // externally for the moment. 808 unsigned ExistingPrefixSize = Code.size(); 809 if (TargetPrefixMax <= ExistingPrefixSize) 810 return 0; 811 return TargetPrefixMax - ExistingPrefixSize; 812 }(); 813 const unsigned PrefixBytesToAdd = 814 std::min(MaxPossiblePad, RemainingPrefixSize); 815 if (PrefixBytesToAdd == 0) 816 return false; 817 818 const uint8_t Prefix = determinePaddingPrefix(RF.getInst()); 819 820 SmallString<256> Code; 821 Code.append(PrefixBytesToAdd, Prefix); 822 Code.append(RF.getContents().begin(), RF.getContents().end()); 823 RF.getContents() = Code; 824 825 // Adjust the fixups for the change in offsets 826 for (auto &F : RF.getFixups()) { 827 F.setOffset(F.getOffset() + PrefixBytesToAdd); 828 } 829 830 RemainingSize -= PrefixBytesToAdd; 831 return true; 832 } 833 834 bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF, 835 MCCodeEmitter &Emitter, 836 unsigned &RemainingSize) const { 837 if (isFullyRelaxed(RF)) 838 // TODO: There are lots of other tricks we could apply for increasing 839 // encoding size without impacting performance. 840 return false; 841 842 MCInst Relaxed = RF.getInst(); 843 relaxInstruction(Relaxed, *RF.getSubtargetInfo()); 844 845 SmallVector<MCFixup, 4> Fixups; 846 SmallString<15> Code; 847 raw_svector_ostream VecOS(Code); 848 Emitter.encodeInstruction(Relaxed, VecOS, Fixups, *RF.getSubtargetInfo()); 849 const unsigned OldSize = RF.getContents().size(); 850 const unsigned NewSize = Code.size(); 851 assert(NewSize >= OldSize && "size decrease during relaxation?"); 852 unsigned Delta = NewSize - OldSize; 853 if (Delta > RemainingSize) 854 return false; 855 RF.setInst(Relaxed); 856 RF.getContents() = Code; 857 RF.getFixups() = Fixups; 858 RemainingSize -= Delta; 859 return true; 860 } 861 862 bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF, 863 MCCodeEmitter &Emitter, 864 unsigned &RemainingSize) const { 865 bool Changed = false; 866 if (RemainingSize != 0) 867 Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize); 868 if (RemainingSize != 0) 869 Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize); 870 return Changed; 871 } 872 873 void X86AsmBackend::finishLayout(MCAssembler const &Asm, 874 MCAsmLayout &Layout) const { 875 // See if we can further relax some instructions to cut down on the number of 876 // nop bytes required for code alignment. The actual win is in reducing 877 // instruction count, not number of bytes. Modern X86-64 can easily end up 878 // decode limited. It is often better to reduce the number of instructions 879 // (i.e. eliminate nops) even at the cost of increasing the size and 880 // complexity of others. 881 if (!X86PadForAlign && !X86PadForBranchAlign) 882 return; 883 884 // The processed regions are delimitered by LabeledFragments. -g may have more 885 // MCSymbols and therefore different relaxation results. X86PadForAlign is 886 // disabled by default to eliminate the -g vs non -g difference. 887 DenseSet<MCFragment *> LabeledFragments; 888 for (const MCSymbol &S : Asm.symbols()) 889 LabeledFragments.insert(S.getFragment(false)); 890 891 for (MCSection &Sec : Asm) { 892 if (!Sec.getKind().isText()) 893 continue; 894 895 SmallVector<MCRelaxableFragment *, 4> Relaxable; 896 for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) { 897 MCFragment &F = *I; 898 899 if (LabeledFragments.count(&F)) 900 Relaxable.clear(); 901 902 if (F.getKind() == MCFragment::FT_Data || 903 F.getKind() == MCFragment::FT_CompactEncodedInst) 904 // Skip and ignore 905 continue; 906 907 if (F.getKind() == MCFragment::FT_Relaxable) { 908 auto &RF = cast<MCRelaxableFragment>(*I); 909 Relaxable.push_back(&RF); 910 continue; 911 } 912 913 auto canHandle = [](MCFragment &F) -> bool { 914 switch (F.getKind()) { 915 default: 916 return false; 917 case MCFragment::FT_Align: 918 return X86PadForAlign; 919 case MCFragment::FT_BoundaryAlign: 920 return X86PadForBranchAlign; 921 } 922 }; 923 // For any unhandled kind, assume we can't change layout. 924 if (!canHandle(F)) { 925 Relaxable.clear(); 926 continue; 927 } 928 929 #ifndef NDEBUG 930 const uint64_t OrigOffset = Layout.getFragmentOffset(&F); 931 #endif 932 const uint64_t OrigSize = Asm.computeFragmentSize(Layout, F); 933 934 // To keep the effects local, prefer to relax instructions closest to 935 // the align directive. This is purely about human understandability 936 // of the resulting code. If we later find a reason to expand 937 // particular instructions over others, we can adjust. 938 MCFragment *FirstChangedFragment = nullptr; 939 unsigned RemainingSize = OrigSize; 940 while (!Relaxable.empty() && RemainingSize != 0) { 941 auto &RF = *Relaxable.pop_back_val(); 942 // Give the backend a chance to play any tricks it wishes to increase 943 // the encoding size of the given instruction. Target independent code 944 // will try further relaxation, but target's may play further tricks. 945 if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize)) 946 FirstChangedFragment = &RF; 947 948 // If we have an instruction which hasn't been fully relaxed, we can't 949 // skip past it and insert bytes before it. Changing its starting 950 // offset might require a larger negative offset than it can encode. 951 // We don't need to worry about larger positive offsets as none of the 952 // possible offsets between this and our align are visible, and the 953 // ones afterwards aren't changing. 954 if (!isFullyRelaxed(RF)) 955 break; 956 } 957 Relaxable.clear(); 958 959 if (FirstChangedFragment) { 960 // Make sure the offsets for any fragments in the effected range get 961 // updated. Note that this (conservatively) invalidates the offsets of 962 // those following, but this is not required. 963 Layout.invalidateFragmentsFrom(FirstChangedFragment); 964 } 965 966 // BoundaryAlign explicitly tracks it's size (unlike align) 967 if (F.getKind() == MCFragment::FT_BoundaryAlign) 968 cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize); 969 970 #ifndef NDEBUG 971 const uint64_t FinalOffset = Layout.getFragmentOffset(&F); 972 const uint64_t FinalSize = Asm.computeFragmentSize(Layout, F); 973 assert(OrigOffset + OrigSize == FinalOffset + FinalSize && 974 "can't move start of next fragment!"); 975 assert(FinalSize == RemainingSize && "inconsistent size computation?"); 976 #endif 977 978 // If we're looking at a boundary align, make sure we don't try to pad 979 // its target instructions for some following directive. Doing so would 980 // break the alignment of the current boundary align. 981 if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) { 982 const MCFragment *LastFragment = BF->getLastFragment(); 983 if (!LastFragment) 984 continue; 985 while (&*I != LastFragment) 986 ++I; 987 } 988 } 989 } 990 991 // The layout is done. Mark every fragment as valid. 992 for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) { 993 MCSection &Section = *Layout.getSectionOrder()[i]; 994 Layout.getFragmentOffset(&*Section.getFragmentList().rbegin()); 995 Asm.computeFragmentSize(Layout, *Section.getFragmentList().rbegin()); 996 } 997 } 998 999 unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const { 1000 if (STI.hasFeature(X86::Is16Bit)) 1001 return 4; 1002 if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Is64Bit)) 1003 return 1; 1004 if (STI.getFeatureBits()[X86::TuningFast7ByteNOP]) 1005 return 7; 1006 if (STI.getFeatureBits()[X86::TuningFast15ByteNOP]) 1007 return 15; 1008 if (STI.getFeatureBits()[X86::TuningFast11ByteNOP]) 1009 return 11; 1010 // FIXME: handle 32-bit mode 1011 // 15-bytes is the longest single NOP instruction, but 10-bytes is 1012 // commonly the longest that can be efficiently decoded. 1013 return 10; 1014 } 1015 1016 /// Write a sequence of optimal nops to the output, covering \p Count 1017 /// bytes. 1018 /// \return - true on success, false on failure 1019 bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, 1020 const MCSubtargetInfo *STI) const { 1021 1022 // Write 1 or 2 byte NOP sequences, or a longer trapsled, until 1023 // we have written Count bytes 1024 do { 1025 const uint8_t ThisNopLength = (uint8_t) std::min(Count, (uint64_t)127); 1026 switch (ThisNopLength) { 1027 case 0: break; 1028 case 1: OS << '\x90'; 1029 break; 1030 case 2: OS << '\x66'; 1031 OS << '\x90'; 1032 break; 1033 default: OS << '\xEB'; 1034 OS << (uint8_t)(ThisNopLength - 2); 1035 for(uint8_t i = 2; i < ThisNopLength; ++i) 1036 OS << '\xCC'; 1037 } 1038 Count -= ThisNopLength; 1039 } while (Count != 0); 1040 1041 return true; 1042 } 1043 1044 /* *** */ 1045 1046 namespace { 1047 1048 class ELFX86AsmBackend : public X86AsmBackend { 1049 public: 1050 uint8_t OSABI; 1051 ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI) 1052 : X86AsmBackend(T, STI), OSABI(OSABI) {} 1053 }; 1054 1055 class ELFX86_32AsmBackend : public ELFX86AsmBackend { 1056 public: 1057 ELFX86_32AsmBackend(const Target &T, uint8_t OSABI, 1058 const MCSubtargetInfo &STI) 1059 : ELFX86AsmBackend(T, OSABI, STI) {} 1060 1061 std::unique_ptr<MCObjectTargetWriter> 1062 createObjectTargetWriter() const override { 1063 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, ELF::EM_386); 1064 } 1065 }; 1066 1067 class ELFX86_X32AsmBackend : public ELFX86AsmBackend { 1068 public: 1069 ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI, 1070 const MCSubtargetInfo &STI) 1071 : ELFX86AsmBackend(T, OSABI, STI) {} 1072 1073 std::unique_ptr<MCObjectTargetWriter> 1074 createObjectTargetWriter() const override { 1075 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, 1076 ELF::EM_X86_64); 1077 } 1078 }; 1079 1080 class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend { 1081 public: 1082 ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI, 1083 const MCSubtargetInfo &STI) 1084 : ELFX86AsmBackend(T, OSABI, STI) {} 1085 1086 std::unique_ptr<MCObjectTargetWriter> 1087 createObjectTargetWriter() const override { 1088 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, 1089 ELF::EM_IAMCU); 1090 } 1091 }; 1092 1093 class ELFX86_64AsmBackend : public ELFX86AsmBackend { 1094 public: 1095 ELFX86_64AsmBackend(const Target &T, uint8_t OSABI, 1096 const MCSubtargetInfo &STI) 1097 : ELFX86AsmBackend(T, OSABI, STI) {} 1098 1099 std::unique_ptr<MCObjectTargetWriter> 1100 createObjectTargetWriter() const override { 1101 return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI, ELF::EM_X86_64); 1102 } 1103 }; 1104 1105 class WindowsX86AsmBackend : public X86AsmBackend { 1106 bool Is64Bit; 1107 1108 public: 1109 WindowsX86AsmBackend(const Target &T, bool is64Bit, 1110 const MCSubtargetInfo &STI) 1111 : X86AsmBackend(T, STI) 1112 , Is64Bit(is64Bit) { 1113 } 1114 1115 std::optional<MCFixupKind> getFixupKind(StringRef Name) const override { 1116 return StringSwitch<std::optional<MCFixupKind>>(Name) 1117 .Case("dir32", FK_Data_4) 1118 .Case("secrel32", FK_SecRel_4) 1119 .Case("secidx", FK_SecRel_2) 1120 .Default(MCAsmBackend::getFixupKind(Name)); 1121 } 1122 1123 std::unique_ptr<MCObjectTargetWriter> 1124 createObjectTargetWriter() const override { 1125 return createX86WinCOFFObjectWriter(Is64Bit); 1126 } 1127 }; 1128 1129 namespace CU { 1130 1131 /// Compact unwind encoding values. 1132 enum CompactUnwindEncodings { 1133 /// [RE]BP based frame where [RE]BP is pused on the stack immediately after 1134 /// the return address, then [RE]SP is moved to [RE]BP. 1135 UNWIND_MODE_BP_FRAME = 0x01000000, 1136 1137 /// A frameless function with a small constant stack size. 1138 UNWIND_MODE_STACK_IMMD = 0x02000000, 1139 1140 /// A frameless function with a large constant stack size. 1141 UNWIND_MODE_STACK_IND = 0x03000000, 1142 1143 /// No compact unwind encoding is available. 1144 UNWIND_MODE_DWARF = 0x04000000, 1145 1146 /// Mask for encoding the frame registers. 1147 UNWIND_BP_FRAME_REGISTERS = 0x00007FFF, 1148 1149 /// Mask for encoding the frameless registers. 1150 UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF 1151 }; 1152 1153 } // namespace CU 1154 1155 class DarwinX86AsmBackend : public X86AsmBackend { 1156 const MCRegisterInfo &MRI; 1157 1158 /// Number of registers that can be saved in a compact unwind encoding. 1159 enum { CU_NUM_SAVED_REGS = 6 }; 1160 1161 mutable unsigned SavedRegs[CU_NUM_SAVED_REGS]; 1162 Triple TT; 1163 bool Is64Bit; 1164 1165 unsigned OffsetSize; ///< Offset of a "push" instruction. 1166 unsigned MoveInstrSize; ///< Size of a "move" instruction. 1167 unsigned StackDivide; ///< Amount to adjust stack size by. 1168 protected: 1169 /// Size of a "push" instruction for the given register. 1170 unsigned PushInstrSize(unsigned Reg) const { 1171 switch (Reg) { 1172 case X86::EBX: 1173 case X86::ECX: 1174 case X86::EDX: 1175 case X86::EDI: 1176 case X86::ESI: 1177 case X86::EBP: 1178 case X86::RBX: 1179 case X86::RBP: 1180 return 1; 1181 case X86::R12: 1182 case X86::R13: 1183 case X86::R14: 1184 case X86::R15: 1185 return 2; 1186 } 1187 return 1; 1188 } 1189 1190 private: 1191 /// Get the compact unwind number for a given register. The number 1192 /// corresponds to the enum lists in compact_unwind_encoding.h. 1193 int getCompactUnwindRegNum(unsigned Reg) const { 1194 static const MCPhysReg CU32BitRegs[7] = { 1195 X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 1196 }; 1197 static const MCPhysReg CU64BitRegs[] = { 1198 X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 1199 }; 1200 const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs; 1201 for (int Idx = 1; *CURegs; ++CURegs, ++Idx) 1202 if (*CURegs == Reg) 1203 return Idx; 1204 1205 return -1; 1206 } 1207 1208 /// Return the registers encoded for a compact encoding with a frame 1209 /// pointer. 1210 uint32_t encodeCompactUnwindRegistersWithFrame() const { 1211 // Encode the registers in the order they were saved --- 3-bits per 1212 // register. The list of saved registers is assumed to be in reverse 1213 // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS. 1214 uint32_t RegEnc = 0; 1215 for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) { 1216 unsigned Reg = SavedRegs[i]; 1217 if (Reg == 0) break; 1218 1219 int CURegNum = getCompactUnwindRegNum(Reg); 1220 if (CURegNum == -1) return ~0U; 1221 1222 // Encode the 3-bit register number in order, skipping over 3-bits for 1223 // each register. 1224 RegEnc |= (CURegNum & 0x7) << (Idx++ * 3); 1225 } 1226 1227 assert((RegEnc & 0x3FFFF) == RegEnc && 1228 "Invalid compact register encoding!"); 1229 return RegEnc; 1230 } 1231 1232 /// Create the permutation encoding used with frameless stacks. It is 1233 /// passed the number of registers to be saved and an array of the registers 1234 /// saved. 1235 uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const { 1236 // The saved registers are numbered from 1 to 6. In order to encode the 1237 // order in which they were saved, we re-number them according to their 1238 // place in the register order. The re-numbering is relative to the last 1239 // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in 1240 // that order: 1241 // 1242 // Orig Re-Num 1243 // ---- ------ 1244 // 6 6 1245 // 2 2 1246 // 4 3 1247 // 5 3 1248 // 1249 for (unsigned i = 0; i < RegCount; ++i) { 1250 int CUReg = getCompactUnwindRegNum(SavedRegs[i]); 1251 if (CUReg == -1) return ~0U; 1252 SavedRegs[i] = CUReg; 1253 } 1254 1255 // Reverse the list. 1256 std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]); 1257 1258 uint32_t RenumRegs[CU_NUM_SAVED_REGS]; 1259 for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){ 1260 unsigned Countless = 0; 1261 for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j) 1262 if (SavedRegs[j] < SavedRegs[i]) 1263 ++Countless; 1264 1265 RenumRegs[i] = SavedRegs[i] - Countless - 1; 1266 } 1267 1268 // Take the renumbered values and encode them into a 10-bit number. 1269 uint32_t permutationEncoding = 0; 1270 switch (RegCount) { 1271 case 6: 1272 permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1] 1273 + 6 * RenumRegs[2] + 2 * RenumRegs[3] 1274 + RenumRegs[4]; 1275 break; 1276 case 5: 1277 permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2] 1278 + 6 * RenumRegs[3] + 2 * RenumRegs[4] 1279 + RenumRegs[5]; 1280 break; 1281 case 4: 1282 permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3] 1283 + 3 * RenumRegs[4] + RenumRegs[5]; 1284 break; 1285 case 3: 1286 permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4] 1287 + RenumRegs[5]; 1288 break; 1289 case 2: 1290 permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5]; 1291 break; 1292 case 1: 1293 permutationEncoding |= RenumRegs[5]; 1294 break; 1295 } 1296 1297 assert((permutationEncoding & 0x3FF) == permutationEncoding && 1298 "Invalid compact register encoding!"); 1299 return permutationEncoding; 1300 } 1301 1302 public: 1303 DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI, 1304 const MCSubtargetInfo &STI) 1305 : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()), 1306 Is64Bit(TT.isArch64Bit()) { 1307 memset(SavedRegs, 0, sizeof(SavedRegs)); 1308 OffsetSize = Is64Bit ? 8 : 4; 1309 MoveInstrSize = Is64Bit ? 3 : 2; 1310 StackDivide = Is64Bit ? 8 : 4; 1311 } 1312 1313 std::unique_ptr<MCObjectTargetWriter> 1314 createObjectTargetWriter() const override { 1315 uint32_t CPUType = cantFail(MachO::getCPUType(TT)); 1316 uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT)); 1317 return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType); 1318 } 1319 1320 /// Implementation of algorithm to generate the compact unwind encoding 1321 /// for the CFI instructions. 1322 uint32_t 1323 generateCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs) const override { 1324 if (Instrs.empty()) return 0; 1325 1326 // Reset the saved registers. 1327 unsigned SavedRegIdx = 0; 1328 memset(SavedRegs, 0, sizeof(SavedRegs)); 1329 1330 bool HasFP = false; 1331 1332 // Encode that we are using EBP/RBP as the frame pointer. 1333 uint32_t CompactUnwindEncoding = 0; 1334 1335 unsigned SubtractInstrIdx = Is64Bit ? 3 : 2; 1336 unsigned InstrOffset = 0; 1337 unsigned StackAdjust = 0; 1338 unsigned StackSize = 0; 1339 int MinAbsOffset = std::numeric_limits<int>::max(); 1340 1341 for (const MCCFIInstruction &Inst : Instrs) { 1342 switch (Inst.getOperation()) { 1343 default: 1344 // Any other CFI directives indicate a frame that we aren't prepared 1345 // to represent via compact unwind, so just bail out. 1346 return CU::UNWIND_MODE_DWARF; 1347 case MCCFIInstruction::OpDefCfaRegister: { 1348 // Defines a frame pointer. E.g. 1349 // 1350 // movq %rsp, %rbp 1351 // L0: 1352 // .cfi_def_cfa_register %rbp 1353 // 1354 HasFP = true; 1355 1356 // If the frame pointer is other than esp/rsp, we do not have a way to 1357 // generate a compact unwinding representation, so bail out. 1358 if (*MRI.getLLVMRegNum(Inst.getRegister(), true) != 1359 (Is64Bit ? X86::RBP : X86::EBP)) 1360 return CU::UNWIND_MODE_DWARF; 1361 1362 // Reset the counts. 1363 memset(SavedRegs, 0, sizeof(SavedRegs)); 1364 StackAdjust = 0; 1365 SavedRegIdx = 0; 1366 MinAbsOffset = std::numeric_limits<int>::max(); 1367 InstrOffset += MoveInstrSize; 1368 break; 1369 } 1370 case MCCFIInstruction::OpDefCfaOffset: { 1371 // Defines a new offset for the CFA. E.g. 1372 // 1373 // With frame: 1374 // 1375 // pushq %rbp 1376 // L0: 1377 // .cfi_def_cfa_offset 16 1378 // 1379 // Without frame: 1380 // 1381 // subq $72, %rsp 1382 // L0: 1383 // .cfi_def_cfa_offset 80 1384 // 1385 StackSize = Inst.getOffset() / StackDivide; 1386 break; 1387 } 1388 case MCCFIInstruction::OpOffset: { 1389 // Defines a "push" of a callee-saved register. E.g. 1390 // 1391 // pushq %r15 1392 // pushq %r14 1393 // pushq %rbx 1394 // L0: 1395 // subq $120, %rsp 1396 // L1: 1397 // .cfi_offset %rbx, -40 1398 // .cfi_offset %r14, -32 1399 // .cfi_offset %r15, -24 1400 // 1401 if (SavedRegIdx == CU_NUM_SAVED_REGS) 1402 // If there are too many saved registers, we cannot use a compact 1403 // unwind encoding. 1404 return CU::UNWIND_MODE_DWARF; 1405 1406 unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true); 1407 SavedRegs[SavedRegIdx++] = Reg; 1408 StackAdjust += OffsetSize; 1409 MinAbsOffset = std::min(MinAbsOffset, abs(Inst.getOffset())); 1410 InstrOffset += PushInstrSize(Reg); 1411 break; 1412 } 1413 } 1414 } 1415 1416 StackAdjust /= StackDivide; 1417 1418 if (HasFP) { 1419 if ((StackAdjust & 0xFF) != StackAdjust) 1420 // Offset was too big for a compact unwind encoding. 1421 return CU::UNWIND_MODE_DWARF; 1422 1423 // We don't attempt to track a real StackAdjust, so if the saved registers 1424 // aren't adjacent to rbp we can't cope. 1425 if (SavedRegIdx != 0 && MinAbsOffset != 3 * (int)OffsetSize) 1426 return CU::UNWIND_MODE_DWARF; 1427 1428 // Get the encoding of the saved registers when we have a frame pointer. 1429 uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(); 1430 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF; 1431 1432 CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME; 1433 CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16; 1434 CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS; 1435 } else { 1436 SubtractInstrIdx += InstrOffset; 1437 ++StackAdjust; 1438 1439 if ((StackSize & 0xFF) == StackSize) { 1440 // Frameless stack with a small stack size. 1441 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD; 1442 1443 // Encode the stack size. 1444 CompactUnwindEncoding |= (StackSize & 0xFF) << 16; 1445 } else { 1446 if ((StackAdjust & 0x7) != StackAdjust) 1447 // The extra stack adjustments are too big for us to handle. 1448 return CU::UNWIND_MODE_DWARF; 1449 1450 // Frameless stack with an offset too large for us to encode compactly. 1451 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND; 1452 1453 // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP' 1454 // instruction. 1455 CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16; 1456 1457 // Encode any extra stack adjustments (done via push instructions). 1458 CompactUnwindEncoding |= (StackAdjust & 0x7) << 13; 1459 } 1460 1461 // Encode the number of registers saved. (Reverse the list first.) 1462 std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]); 1463 CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10; 1464 1465 // Get the encoding of the saved registers when we don't have a frame 1466 // pointer. 1467 uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx); 1468 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF; 1469 1470 // Encode the register encoding. 1471 CompactUnwindEncoding |= 1472 RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION; 1473 } 1474 1475 return CompactUnwindEncoding; 1476 } 1477 }; 1478 1479 } // end anonymous namespace 1480 1481 MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, 1482 const MCSubtargetInfo &STI, 1483 const MCRegisterInfo &MRI, 1484 const MCTargetOptions &Options) { 1485 const Triple &TheTriple = STI.getTargetTriple(); 1486 if (TheTriple.isOSBinFormatMachO()) 1487 return new DarwinX86AsmBackend(T, MRI, STI); 1488 1489 if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF()) 1490 return new WindowsX86AsmBackend(T, false, STI); 1491 1492 uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); 1493 1494 if (TheTriple.isOSIAMCU()) 1495 return new ELFX86_IAMCUAsmBackend(T, OSABI, STI); 1496 1497 return new ELFX86_32AsmBackend(T, OSABI, STI); 1498 } 1499 1500 MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, 1501 const MCSubtargetInfo &STI, 1502 const MCRegisterInfo &MRI, 1503 const MCTargetOptions &Options) { 1504 const Triple &TheTriple = STI.getTargetTriple(); 1505 if (TheTriple.isOSBinFormatMachO()) 1506 return new DarwinX86AsmBackend(T, MRI, STI); 1507 1508 if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF()) 1509 return new WindowsX86AsmBackend(T, true, STI); 1510 1511 uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); 1512 1513 if (TheTriple.isX32()) 1514 return new ELFX86_X32AsmBackend(T, OSABI, STI); 1515 return new ELFX86_64AsmBackend(T, OSABI, STI); 1516 } 1517