1 //===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "MCTargetDesc/X86BaseInfo.h"
10 #include "MCTargetDesc/X86FixupKinds.h"
11 #include "llvm/ADT/StringSwitch.h"
12 #include "llvm/BinaryFormat/ELF.h"
13 #include "llvm/BinaryFormat/MachO.h"
14 #include "llvm/MC/MCAsmBackend.h"
15 #include "llvm/MC/MCAsmLayout.h"
16 #include "llvm/MC/MCAssembler.h"
17 #include "llvm/MC/MCCodeEmitter.h"
18 #include "llvm/MC/MCContext.h"
19 #include "llvm/MC/MCDwarf.h"
20 #include "llvm/MC/MCELFObjectWriter.h"
21 #include "llvm/MC/MCExpr.h"
22 #include "llvm/MC/MCFixupKindInfo.h"
23 #include "llvm/MC/MCInst.h"
24 #include "llvm/MC/MCInstrInfo.h"
25 #include "llvm/MC/MCMachObjectWriter.h"
26 #include "llvm/MC/MCObjectStreamer.h"
27 #include "llvm/MC/MCObjectWriter.h"
28 #include "llvm/MC/MCRegisterInfo.h"
29 #include "llvm/MC/MCSectionMachO.h"
30 #include "llvm/MC/MCSubtargetInfo.h"
31 #include "llvm/MC/MCValue.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Support/TargetRegistry.h"
35 #include "llvm/Support/raw_ostream.h"
36
37 using namespace llvm;
38
39 namespace {
40 /// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind
41 class X86AlignBranchKind {
42 private:
43 uint8_t AlignBranchKind = 0;
44
45 public:
operator =(const std::string & Val)46 void operator=(const std::string &Val) {
47 if (Val.empty())
48 return;
49 SmallVector<StringRef, 6> BranchTypes;
50 StringRef(Val).split(BranchTypes, '+', -1, false);
51 for (auto BranchType : BranchTypes) {
52 if (BranchType == "fused")
53 addKind(X86::AlignBranchFused);
54 else if (BranchType == "jcc")
55 addKind(X86::AlignBranchJcc);
56 else if (BranchType == "jmp")
57 addKind(X86::AlignBranchJmp);
58 else if (BranchType == "call")
59 addKind(X86::AlignBranchCall);
60 else if (BranchType == "ret")
61 addKind(X86::AlignBranchRet);
62 else if (BranchType == "indirect")
63 addKind(X86::AlignBranchIndirect);
64 else {
65 errs() << "invalid argument " << BranchType.str()
66 << " to -x86-align-branch=; each element must be one of: fused, "
67 "jcc, jmp, call, ret, indirect.(plus separated)\n";
68 }
69 }
70 }
71
operator uint8_t() const72 operator uint8_t() const { return AlignBranchKind; }
addKind(X86::AlignBranchBoundaryKind Value)73 void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; }
74 };
75
76 X86AlignBranchKind X86AlignBranchKindLoc;
77
78 cl::opt<unsigned> X86AlignBranchBoundary(
79 "x86-align-branch-boundary", cl::init(0),
80 cl::desc(
81 "Control how the assembler should align branches with NOP. If the "
82 "boundary's size is not 0, it should be a power of 2 and no less "
83 "than 32. Branches will be aligned to prevent from being across or "
84 "against the boundary of specified size. The default value 0 does not "
85 "align branches."));
86
87 cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch(
88 "x86-align-branch",
89 cl::desc(
90 "Specify types of branches to align (plus separated list of types):"
91 "\njcc indicates conditional jumps"
92 "\nfused indicates fused conditional jumps"
93 "\njmp indicates direct unconditional jumps"
94 "\ncall indicates direct and indirect calls"
95 "\nret indicates rets"
96 "\nindirect indicates indirect unconditional jumps"),
97 cl::location(X86AlignBranchKindLoc));
98
99 cl::opt<bool> X86AlignBranchWithin32BBoundaries(
100 "x86-branches-within-32B-boundaries", cl::init(false),
101 cl::desc(
102 "Align selected instructions to mitigate negative performance impact "
103 "of Intel's micro code update for errata skx102. May break "
104 "assumptions about labels corresponding to particular instructions, "
105 "and should be used with caution."));
106
107 cl::opt<unsigned> X86PadMaxPrefixSize(
108 "x86-pad-max-prefix-size", cl::init(0),
109 cl::desc("Maximum number of prefixes to use for padding"));
110
111 cl::opt<bool> X86PadForAlign(
112 "x86-pad-for-align", cl::init(true), cl::Hidden,
113 cl::desc("Pad previous instructions to implement align directives"));
114
115 cl::opt<bool> X86PadForBranchAlign(
116 "x86-pad-for-branch-align", cl::init(true), cl::Hidden,
117 cl::desc("Pad previous instructions to implement branch alignment"));
118
119 class X86ELFObjectWriter : public MCELFObjectTargetWriter {
120 public:
X86ELFObjectWriter(bool is64Bit,uint8_t OSABI,uint16_t EMachine,bool HasRelocationAddend,bool foobar)121 X86ELFObjectWriter(bool is64Bit, uint8_t OSABI, uint16_t EMachine,
122 bool HasRelocationAddend, bool foobar)
123 : MCELFObjectTargetWriter(is64Bit, OSABI, EMachine, HasRelocationAddend) {}
124 };
125
126 class X86AsmBackend : public MCAsmBackend {
127 const MCSubtargetInfo &STI;
128 std::unique_ptr<const MCInstrInfo> MCII;
129 X86AlignBranchKind AlignBranchType;
130 Align AlignBoundary;
131 unsigned TargetPrefixMax = 0;
132
133 MCInst PrevInst;
134 MCBoundaryAlignFragment *PendingBA = nullptr;
135 std::pair<MCFragment *, size_t> PrevInstPosition;
136 bool CanPadInst;
137
138 uint8_t determinePaddingPrefix(const MCInst &Inst) const;
139 bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;
140 bool needAlign(const MCInst &Inst) const;
141 bool canPadBranches(MCObjectStreamer &OS) const;
142 bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const;
143
144 public:
X86AsmBackend(const Target & T,const MCSubtargetInfo & STI)145 X86AsmBackend(const Target &T, const MCSubtargetInfo &STI)
146 : MCAsmBackend(support::little), STI(STI),
147 MCII(T.createMCInstrInfo()) {
148 if (X86AlignBranchWithin32BBoundaries) {
149 // At the moment, this defaults to aligning fused branches, unconditional
150 // jumps, and (unfused) conditional jumps with nops. Both the
151 // instructions aligned and the alignment method (nop vs prefix) may
152 // change in the future.
153 AlignBoundary = assumeAligned(32);;
154 AlignBranchType.addKind(X86::AlignBranchFused);
155 AlignBranchType.addKind(X86::AlignBranchJcc);
156 AlignBranchType.addKind(X86::AlignBranchJmp);
157 }
158 // Allow overriding defaults set by master flag
159 if (X86AlignBranchBoundary.getNumOccurrences())
160 AlignBoundary = assumeAligned(X86AlignBranchBoundary);
161 if (X86AlignBranch.getNumOccurrences())
162 AlignBranchType = X86AlignBranchKindLoc;
163 if (X86PadMaxPrefixSize.getNumOccurrences())
164 TargetPrefixMax = X86PadMaxPrefixSize;
165 }
166
167 bool allowAutoPadding() const override;
168 bool allowEnhancedRelaxation() const override;
169 void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst) override;
170 void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) override;
171
getNumFixupKinds() const172 unsigned getNumFixupKinds() const override {
173 return X86::NumTargetFixupKinds;
174 }
175
176 Optional<MCFixupKind> getFixupKind(StringRef Name) const override;
177
178 const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
179
180 bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
181 const MCValue &Target) override;
182
183 void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
184 const MCValue &Target, MutableArrayRef<char> Data,
185 uint64_t Value, bool IsResolved,
186 const MCSubtargetInfo *STI) const override;
187
188 bool mayNeedRelaxation(const MCInst &Inst,
189 const MCSubtargetInfo &STI) const override;
190
191 bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
192 const MCRelaxableFragment *DF,
193 const MCAsmLayout &Layout) const override;
194
195 void relaxInstruction(MCInst &Inst,
196 const MCSubtargetInfo &STI) const override;
197
198 bool padInstructionViaRelaxation(MCRelaxableFragment &RF,
199 MCCodeEmitter &Emitter,
200 unsigned &RemainingSize) const;
201
202 bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
203 unsigned &RemainingSize) const;
204
205 bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
206 unsigned &RemainingSize) const;
207
208 void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override;
209
210 bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
211 };
212 } // end anonymous namespace
213
getRelaxedOpcodeBranch(const MCInst & Inst,bool Is16BitMode)214 static unsigned getRelaxedOpcodeBranch(const MCInst &Inst, bool Is16BitMode) {
215 unsigned Op = Inst.getOpcode();
216 switch (Op) {
217 default:
218 return Op;
219 case X86::JCC_1:
220 return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4;
221 case X86::JMP_1:
222 return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4;
223 }
224 }
225
getRelaxedOpcodeArith(const MCInst & Inst)226 static unsigned getRelaxedOpcodeArith(const MCInst &Inst) {
227 unsigned Op = Inst.getOpcode();
228 switch (Op) {
229 default:
230 return Op;
231
232 // IMUL
233 case X86::IMUL16rri8: return X86::IMUL16rri;
234 case X86::IMUL16rmi8: return X86::IMUL16rmi;
235 case X86::IMUL32rri8: return X86::IMUL32rri;
236 case X86::IMUL32rmi8: return X86::IMUL32rmi;
237 case X86::IMUL64rri8: return X86::IMUL64rri32;
238 case X86::IMUL64rmi8: return X86::IMUL64rmi32;
239
240 // AND
241 case X86::AND16ri8: return X86::AND16ri;
242 case X86::AND16mi8: return X86::AND16mi;
243 case X86::AND32ri8: return X86::AND32ri;
244 case X86::AND32mi8: return X86::AND32mi;
245 case X86::AND64ri8: return X86::AND64ri32;
246 case X86::AND64mi8: return X86::AND64mi32;
247
248 // OR
249 case X86::OR16ri8: return X86::OR16ri;
250 case X86::OR16mi8: return X86::OR16mi;
251 case X86::OR32ri8: return X86::OR32ri;
252 case X86::OR32mi8: return X86::OR32mi;
253 case X86::OR64ri8: return X86::OR64ri32;
254 case X86::OR64mi8: return X86::OR64mi32;
255
256 // XOR
257 case X86::XOR16ri8: return X86::XOR16ri;
258 case X86::XOR16mi8: return X86::XOR16mi;
259 case X86::XOR32ri8: return X86::XOR32ri;
260 case X86::XOR32mi8: return X86::XOR32mi;
261 case X86::XOR64ri8: return X86::XOR64ri32;
262 case X86::XOR64mi8: return X86::XOR64mi32;
263
264 // ADD
265 case X86::ADD16ri8: return X86::ADD16ri;
266 case X86::ADD16mi8: return X86::ADD16mi;
267 case X86::ADD32ri8: return X86::ADD32ri;
268 case X86::ADD32mi8: return X86::ADD32mi;
269 case X86::ADD64ri8: return X86::ADD64ri32;
270 case X86::ADD64mi8: return X86::ADD64mi32;
271
272 // ADC
273 case X86::ADC16ri8: return X86::ADC16ri;
274 case X86::ADC16mi8: return X86::ADC16mi;
275 case X86::ADC32ri8: return X86::ADC32ri;
276 case X86::ADC32mi8: return X86::ADC32mi;
277 case X86::ADC64ri8: return X86::ADC64ri32;
278 case X86::ADC64mi8: return X86::ADC64mi32;
279
280 // SUB
281 case X86::SUB16ri8: return X86::SUB16ri;
282 case X86::SUB16mi8: return X86::SUB16mi;
283 case X86::SUB32ri8: return X86::SUB32ri;
284 case X86::SUB32mi8: return X86::SUB32mi;
285 case X86::SUB64ri8: return X86::SUB64ri32;
286 case X86::SUB64mi8: return X86::SUB64mi32;
287
288 // SBB
289 case X86::SBB16ri8: return X86::SBB16ri;
290 case X86::SBB16mi8: return X86::SBB16mi;
291 case X86::SBB32ri8: return X86::SBB32ri;
292 case X86::SBB32mi8: return X86::SBB32mi;
293 case X86::SBB64ri8: return X86::SBB64ri32;
294 case X86::SBB64mi8: return X86::SBB64mi32;
295
296 // CMP
297 case X86::CMP16ri8: return X86::CMP16ri;
298 case X86::CMP16mi8: return X86::CMP16mi;
299 case X86::CMP32ri8: return X86::CMP32ri;
300 case X86::CMP32mi8: return X86::CMP32mi;
301 case X86::CMP64ri8: return X86::CMP64ri32;
302 case X86::CMP64mi8: return X86::CMP64mi32;
303
304 // PUSH
305 case X86::PUSH32i8: return X86::PUSHi32;
306 case X86::PUSH16i8: return X86::PUSHi16;
307 case X86::PUSH64i8: return X86::PUSH64i32;
308 }
309 }
310
getRelaxedOpcode(const MCInst & Inst,bool Is16BitMode)311 static unsigned getRelaxedOpcode(const MCInst &Inst, bool Is16BitMode) {
312 unsigned R = getRelaxedOpcodeArith(Inst);
313 if (R != Inst.getOpcode())
314 return R;
315 return getRelaxedOpcodeBranch(Inst, Is16BitMode);
316 }
317
getCondFromBranch(const MCInst & MI,const MCInstrInfo & MCII)318 static X86::CondCode getCondFromBranch(const MCInst &MI,
319 const MCInstrInfo &MCII) {
320 unsigned Opcode = MI.getOpcode();
321 switch (Opcode) {
322 default:
323 return X86::COND_INVALID;
324 case X86::JCC_1: {
325 const MCInstrDesc &Desc = MCII.get(Opcode);
326 return static_cast<X86::CondCode>(
327 MI.getOperand(Desc.getNumOperands() - 1).getImm());
328 }
329 }
330 }
331
332 static X86::SecondMacroFusionInstKind
classifySecondInstInMacroFusion(const MCInst & MI,const MCInstrInfo & MCII)333 classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) {
334 X86::CondCode CC = getCondFromBranch(MI, MCII);
335 return classifySecondCondCodeInMacroFusion(CC);
336 }
337
338 /// Check if the instruction uses RIP relative addressing.
isRIPRelative(const MCInst & MI,const MCInstrInfo & MCII)339 static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) {
340 unsigned Opcode = MI.getOpcode();
341 const MCInstrDesc &Desc = MCII.get(Opcode);
342 uint64_t TSFlags = Desc.TSFlags;
343 unsigned CurOp = X86II::getOperandBias(Desc);
344 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
345 if (MemoryOperand < 0)
346 return false;
347 unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg;
348 unsigned BaseReg = MI.getOperand(BaseRegNum).getReg();
349 return (BaseReg == X86::RIP);
350 }
351
352 /// Check if the instruction is a prefix.
isPrefix(const MCInst & MI,const MCInstrInfo & MCII)353 static bool isPrefix(const MCInst &MI, const MCInstrInfo &MCII) {
354 return X86II::isPrefix(MCII.get(MI.getOpcode()).TSFlags);
355 }
356
357 /// Check if the instruction is valid as the first instruction in macro fusion.
isFirstMacroFusibleInst(const MCInst & Inst,const MCInstrInfo & MCII)358 static bool isFirstMacroFusibleInst(const MCInst &Inst,
359 const MCInstrInfo &MCII) {
360 // An Intel instruction with RIP relative addressing is not macro fusible.
361 if (isRIPRelative(Inst, MCII))
362 return false;
363 X86::FirstMacroFusionInstKind FIK =
364 X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode());
365 return FIK != X86::FirstMacroFusionInstKind::Invalid;
366 }
367
368 /// X86 can reduce the bytes of NOP by padding instructions with prefixes to
369 /// get a better peformance in some cases. Here, we determine which prefix is
370 /// the most suitable.
371 ///
372 /// If the instruction has a segment override prefix, use the existing one.
373 /// If the target is 64-bit, use the CS.
374 /// If the target is 32-bit,
375 /// - If the instruction has a ESP/EBP base register, use SS.
376 /// - Otherwise use DS.
determinePaddingPrefix(const MCInst & Inst) const377 uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const {
378 assert((STI.hasFeature(X86::Mode32Bit) || STI.hasFeature(X86::Mode64Bit)) &&
379 "Prefixes can be added only in 32-bit or 64-bit mode.");
380 const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
381 uint64_t TSFlags = Desc.TSFlags;
382
383 // Determine where the memory operand starts, if present.
384 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
385 if (MemoryOperand != -1)
386 MemoryOperand += X86II::getOperandBias(Desc);
387
388 unsigned SegmentReg = 0;
389 if (MemoryOperand >= 0) {
390 // Check for explicit segment override on memory operand.
391 SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg();
392 }
393
394 switch (TSFlags & X86II::FormMask) {
395 default:
396 break;
397 case X86II::RawFrmDstSrc: {
398 // Check segment override opcode prefix as needed (not for %ds).
399 if (Inst.getOperand(2).getReg() != X86::DS)
400 SegmentReg = Inst.getOperand(2).getReg();
401 break;
402 }
403 case X86II::RawFrmSrc: {
404 // Check segment override opcode prefix as needed (not for %ds).
405 if (Inst.getOperand(1).getReg() != X86::DS)
406 SegmentReg = Inst.getOperand(1).getReg();
407 break;
408 }
409 case X86II::RawFrmMemOffs: {
410 // Check segment override opcode prefix as needed.
411 SegmentReg = Inst.getOperand(1).getReg();
412 break;
413 }
414 }
415
416 if (SegmentReg != 0)
417 return X86::getSegmentOverridePrefixForReg(SegmentReg);
418
419 if (STI.hasFeature(X86::Mode64Bit))
420 return X86::CS_Encoding;
421
422 if (MemoryOperand >= 0) {
423 unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg;
424 unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg();
425 if (BaseReg == X86::ESP || BaseReg == X86::EBP)
426 return X86::SS_Encoding;
427 }
428 return X86::DS_Encoding;
429 }
430
431 /// Check if the two instructions will be macro-fused on the target cpu.
isMacroFused(const MCInst & Cmp,const MCInst & Jcc) const432 bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const {
433 const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode());
434 if (!InstDesc.isConditionalBranch())
435 return false;
436 if (!isFirstMacroFusibleInst(Cmp, *MCII))
437 return false;
438 const X86::FirstMacroFusionInstKind CmpKind =
439 X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode());
440 const X86::SecondMacroFusionInstKind BranchKind =
441 classifySecondInstInMacroFusion(Jcc, *MCII);
442 return X86::isMacroFused(CmpKind, BranchKind);
443 }
444
445 /// Check if the instruction has a variant symbol operand.
hasVariantSymbol(const MCInst & MI)446 static bool hasVariantSymbol(const MCInst &MI) {
447 for (auto &Operand : MI) {
448 if (!Operand.isExpr())
449 continue;
450 const MCExpr &Expr = *Operand.getExpr();
451 if (Expr.getKind() == MCExpr::SymbolRef &&
452 cast<MCSymbolRefExpr>(Expr).getKind() != MCSymbolRefExpr::VK_None)
453 return true;
454 }
455 return false;
456 }
457
allowAutoPadding() const458 bool X86AsmBackend::allowAutoPadding() const {
459 return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone);
460 }
461
allowEnhancedRelaxation() const462 bool X86AsmBackend::allowEnhancedRelaxation() const {
463 return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign;
464 }
465
466 /// X86 has certain instructions which enable interrupts exactly one
467 /// instruction *after* the instruction which stores to SS. Return true if the
468 /// given instruction has such an interrupt delay slot.
hasInterruptDelaySlot(const MCInst & Inst)469 static bool hasInterruptDelaySlot(const MCInst &Inst) {
470 switch (Inst.getOpcode()) {
471 case X86::POPSS16:
472 case X86::POPSS32:
473 case X86::STI:
474 return true;
475
476 case X86::MOV16sr:
477 case X86::MOV32sr:
478 case X86::MOV64sr:
479 case X86::MOV16sm:
480 if (Inst.getOperand(0).getReg() == X86::SS)
481 return true;
482 break;
483 }
484 return false;
485 }
486
487 /// Check if the instruction to be emitted is right after any data.
488 static bool
isRightAfterData(MCFragment * CurrentFragment,const std::pair<MCFragment *,size_t> & PrevInstPosition)489 isRightAfterData(MCFragment *CurrentFragment,
490 const std::pair<MCFragment *, size_t> &PrevInstPosition) {
491 MCFragment *F = CurrentFragment;
492 // Empty data fragments may be created to prevent further data being
493 // added into the previous fragment, we need to skip them since they
494 // have no contents.
495 for (; isa_and_nonnull<MCDataFragment>(F); F = F->getPrevNode())
496 if (cast<MCDataFragment>(F)->getContents().size() != 0)
497 break;
498
499 // Since data is always emitted into a DataFragment, our check strategy is
500 // simple here.
501 // - If the fragment is a DataFragment
502 // - If it's not the fragment where the previous instruction is,
503 // returns true.
504 // - If it's the fragment holding the previous instruction but its
505 // size changed since the the previous instruction was emitted into
506 // it, returns true.
507 // - Otherwise returns false.
508 // - If the fragment is not a DataFragment, returns false.
509 if (auto *DF = dyn_cast_or_null<MCDataFragment>(F))
510 return DF != PrevInstPosition.first ||
511 DF->getContents().size() != PrevInstPosition.second;
512
513 return false;
514 }
515
516 /// \returns the fragment size if it has instructions, otherwise returns 0.
getSizeForInstFragment(const MCFragment * F)517 static size_t getSizeForInstFragment(const MCFragment *F) {
518 if (!F || !F->hasInstructions())
519 return 0;
520 // MCEncodedFragmentWithContents being templated makes this tricky.
521 switch (F->getKind()) {
522 default:
523 llvm_unreachable("Unknown fragment with instructions!");
524 case MCFragment::FT_Data:
525 return cast<MCDataFragment>(*F).getContents().size();
526 case MCFragment::FT_Relaxable:
527 return cast<MCRelaxableFragment>(*F).getContents().size();
528 case MCFragment::FT_CompactEncodedInst:
529 return cast<MCCompactEncodedInstFragment>(*F).getContents().size();
530 }
531 }
532
533 /// Return true if we can insert NOP or prefixes automatically before the
534 /// the instruction to be emitted.
canPadInst(const MCInst & Inst,MCObjectStreamer & OS) const535 bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const {
536 if (hasVariantSymbol(Inst))
537 // Linker may rewrite the instruction with variant symbol operand(e.g.
538 // TLSCALL).
539 return false;
540
541 if (hasInterruptDelaySlot(PrevInst))
542 // If this instruction follows an interrupt enabling instruction with a one
543 // instruction delay, inserting a nop would change behavior.
544 return false;
545
546 if (isPrefix(PrevInst, *MCII))
547 // If this instruction follows a prefix, inserting a nop/prefix would change
548 // semantic.
549 return false;
550
551 if (isPrefix(Inst, *MCII))
552 // If this instruction is a prefix, inserting a prefix would change
553 // semantic.
554 return false;
555
556 if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition))
557 // If this instruction follows any data, there is no clear
558 // instruction boundary, inserting a nop/prefix would change semantic.
559 return false;
560
561 return true;
562 }
563
canPadBranches(MCObjectStreamer & OS) const564 bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const {
565 if (!OS.getAllowAutoPadding())
566 return false;
567 assert(allowAutoPadding() && "incorrect initialization!");
568
569 // We only pad in text section.
570 if (!OS.getCurrentSectionOnly()->getKind().isText())
571 return false;
572
573 // To be Done: Currently don't deal with Bundle cases.
574 if (OS.getAssembler().isBundlingEnabled())
575 return false;
576
577 // Branches only need to be aligned in 32-bit or 64-bit mode.
578 if (!(STI.hasFeature(X86::Mode64Bit) || STI.hasFeature(X86::Mode32Bit)))
579 return false;
580
581 return true;
582 }
583
584 /// Check if the instruction operand needs to be aligned.
needAlign(const MCInst & Inst) const585 bool X86AsmBackend::needAlign(const MCInst &Inst) const {
586 const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
587 return (Desc.isConditionalBranch() &&
588 (AlignBranchType & X86::AlignBranchJcc)) ||
589 (Desc.isUnconditionalBranch() &&
590 (AlignBranchType & X86::AlignBranchJmp)) ||
591 (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) ||
592 (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) ||
593 (Desc.isIndirectBranch() &&
594 (AlignBranchType & X86::AlignBranchIndirect));
595 }
596
597 /// Insert BoundaryAlignFragment before instructions to align branches.
emitInstructionBegin(MCObjectStreamer & OS,const MCInst & Inst)598 void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
599 const MCInst &Inst) {
600 CanPadInst = canPadInst(Inst, OS);
601
602 if (!canPadBranches(OS))
603 return;
604
605 if (!isMacroFused(PrevInst, Inst))
606 // Macro fusion doesn't happen indeed, clear the pending.
607 PendingBA = nullptr;
608
609 if (!CanPadInst)
610 return;
611
612 if (PendingBA && OS.getCurrentFragment()->getPrevNode() == PendingBA) {
613 // Macro fusion actually happens and there is no other fragment inserted
614 // after the previous instruction.
615 //
616 // Do nothing here since we already inserted a BoudaryAlign fragment when
617 // we met the first instruction in the fused pair and we'll tie them
618 // together in emitInstructionEnd.
619 //
620 // Note: When there is at least one fragment, such as MCAlignFragment,
621 // inserted after the previous instruction, e.g.
622 //
623 // \code
624 // cmp %rax %rcx
625 // .align 16
626 // je .Label0
627 // \ endcode
628 //
629 // We will treat the JCC as a unfused branch although it may be fused
630 // with the CMP.
631 return;
632 }
633
634 if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) &&
635 isFirstMacroFusibleInst(Inst, *MCII))) {
636 // If we meet a unfused branch or the first instuction in a fusiable pair,
637 // insert a BoundaryAlign fragment.
638 OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary));
639 }
640 }
641
642 /// Set the last fragment to be aligned for the BoundaryAlignFragment.
emitInstructionEnd(MCObjectStreamer & OS,const MCInst & Inst)643 void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) {
644 PrevInst = Inst;
645 MCFragment *CF = OS.getCurrentFragment();
646 PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF));
647 if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF))
648 F->setAllowAutoPadding(CanPadInst);
649
650 if (!canPadBranches(OS))
651 return;
652
653 if (!needAlign(Inst) || !PendingBA)
654 return;
655
656 // Tie the aligned instructions into a a pending BoundaryAlign.
657 PendingBA->setLastFragment(CF);
658 PendingBA = nullptr;
659
660 // We need to ensure that further data isn't added to the current
661 // DataFragment, so that we can get the size of instructions later in
662 // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty
663 // DataFragment.
664 if (isa_and_nonnull<MCDataFragment>(CF))
665 OS.insert(new MCDataFragment());
666
667 // Update the maximum alignment on the current section if necessary.
668 MCSection *Sec = OS.getCurrentSectionOnly();
669 if (AlignBoundary.value() > Sec->getAlignment())
670 Sec->setAlignment(AlignBoundary);
671 }
672
getFixupKind(StringRef Name) const673 Optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const {
674 if (STI.getTargetTriple().isOSBinFormatELF()) {
675 unsigned Type;
676 if (STI.getTargetTriple().getArch() == Triple::x86_64) {
677 Type = llvm::StringSwitch<unsigned>(Name)
678 #define ELF_RELOC(X, Y) .Case(#X, Y)
679 #include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
680 #undef ELF_RELOC
681 .Default(-1u);
682 } else {
683 Type = llvm::StringSwitch<unsigned>(Name)
684 #define ELF_RELOC(X, Y) .Case(#X, Y)
685 #include "llvm/BinaryFormat/ELFRelocs/i386.def"
686 #undef ELF_RELOC
687 .Default(-1u);
688 }
689 if (Type == -1u)
690 return None;
691 return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
692 }
693 return MCAsmBackend::getFixupKind(Name);
694 }
695
getFixupKindInfo(MCFixupKind Kind) const696 const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
697 const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
698 {"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
699 {"reloc_riprel_4byte_movq_load", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
700 {"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
701 {"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
702 {"reloc_signed_4byte", 0, 32, 0},
703 {"reloc_signed_4byte_relax", 0, 32, 0},
704 {"reloc_global_offset_table", 0, 32, 0},
705 {"reloc_global_offset_table8", 0, 64, 0},
706 {"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
707 };
708
709 // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They
710 // do not require any extra processing.
711 if (Kind >= FirstLiteralRelocationKind)
712 return MCAsmBackend::getFixupKindInfo(FK_NONE);
713
714 if (Kind < FirstTargetFixupKind)
715 return MCAsmBackend::getFixupKindInfo(Kind);
716
717 assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
718 "Invalid kind!");
719 assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!");
720 return Infos[Kind - FirstTargetFixupKind];
721 }
722
shouldForceRelocation(const MCAssembler &,const MCFixup & Fixup,const MCValue &)723 bool X86AsmBackend::shouldForceRelocation(const MCAssembler &,
724 const MCFixup &Fixup,
725 const MCValue &) {
726 return Fixup.getKind() >= FirstLiteralRelocationKind;
727 }
728
getFixupKindSize(unsigned Kind)729 static unsigned getFixupKindSize(unsigned Kind) {
730 switch (Kind) {
731 default:
732 llvm_unreachable("invalid fixup kind!");
733 case FK_NONE:
734 return 0;
735 case FK_PCRel_1:
736 case FK_SecRel_1:
737 case FK_Data_1:
738 return 1;
739 case FK_PCRel_2:
740 case FK_SecRel_2:
741 case FK_Data_2:
742 return 2;
743 case FK_PCRel_4:
744 case X86::reloc_riprel_4byte:
745 case X86::reloc_riprel_4byte_relax:
746 case X86::reloc_riprel_4byte_relax_rex:
747 case X86::reloc_riprel_4byte_movq_load:
748 case X86::reloc_signed_4byte:
749 case X86::reloc_signed_4byte_relax:
750 case X86::reloc_global_offset_table:
751 case X86::reloc_branch_4byte_pcrel:
752 case FK_SecRel_4:
753 case FK_Data_4:
754 return 4;
755 case FK_PCRel_8:
756 case FK_SecRel_8:
757 case FK_Data_8:
758 case X86::reloc_global_offset_table8:
759 return 8;
760 }
761 }
762
applyFixup(const MCAssembler & Asm,const MCFixup & Fixup,const MCValue & Target,MutableArrayRef<char> Data,uint64_t Value,bool IsResolved,const MCSubtargetInfo * STI) const763 void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
764 const MCValue &Target,
765 MutableArrayRef<char> Data,
766 uint64_t Value, bool IsResolved,
767 const MCSubtargetInfo *STI) const {
768 unsigned Kind = Fixup.getKind();
769 if (Kind >= FirstLiteralRelocationKind)
770 return;
771 unsigned Size = getFixupKindSize(Kind);
772
773 assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
774
775 int64_t SignedValue = static_cast<int64_t>(Value);
776 if ((Target.isAbsolute() || IsResolved) &&
777 getFixupKindInfo(Fixup.getKind()).Flags &
778 MCFixupKindInfo::FKF_IsPCRel) {
779 // check that PC relative fixup fits into the fixup size.
780 if (Size > 0 && !isIntN(Size * 8, SignedValue))
781 Asm.getContext().reportError(
782 Fixup.getLoc(), "value of " + Twine(SignedValue) +
783 " is too large for field of " + Twine(Size) +
784 ((Size == 1) ? " byte." : " bytes."));
785 } else {
786 // Check that uppper bits are either all zeros or all ones.
787 // Specifically ignore overflow/underflow as long as the leakage is
788 // limited to the lower bits. This is to remain compatible with
789 // other assemblers.
790 assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) &&
791 "Value does not fit in the Fixup field");
792 }
793
794 for (unsigned i = 0; i != Size; ++i)
795 Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
796 }
797
mayNeedRelaxation(const MCInst & Inst,const MCSubtargetInfo & STI) const798 bool X86AsmBackend::mayNeedRelaxation(const MCInst &Inst,
799 const MCSubtargetInfo &STI) const {
800 // Branches can always be relaxed in either mode.
801 if (getRelaxedOpcodeBranch(Inst, false) != Inst.getOpcode())
802 return true;
803
804 // Check if this instruction is ever relaxable.
805 if (getRelaxedOpcodeArith(Inst) == Inst.getOpcode())
806 return false;
807
808
809 // Check if the relaxable operand has an expression. For the current set of
810 // relaxable instructions, the relaxable operand is always the last operand.
811 unsigned RelaxableOp = Inst.getNumOperands() - 1;
812 if (Inst.getOperand(RelaxableOp).isExpr())
813 return true;
814
815 return false;
816 }
817
fixupNeedsRelaxation(const MCFixup & Fixup,uint64_t Value,const MCRelaxableFragment * DF,const MCAsmLayout & Layout) const818 bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
819 uint64_t Value,
820 const MCRelaxableFragment *DF,
821 const MCAsmLayout &Layout) const {
822 // Relax if the value is too big for a (signed) i8.
823 return !isInt<8>(Value);
824 }
825
826 // FIXME: Can tblgen help at all here to verify there aren't other instructions
827 // we can relax?
relaxInstruction(MCInst & Inst,const MCSubtargetInfo & STI) const828 void X86AsmBackend::relaxInstruction(MCInst &Inst,
829 const MCSubtargetInfo &STI) const {
830 // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
831 bool Is16BitMode = STI.getFeatureBits()[X86::Mode16Bit];
832 unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode);
833
834 if (RelaxedOp == Inst.getOpcode()) {
835 SmallString<256> Tmp;
836 raw_svector_ostream OS(Tmp);
837 Inst.dump_pretty(OS);
838 OS << "\n";
839 report_fatal_error("unexpected instruction to relax: " + OS.str());
840 }
841
842 Inst.setOpcode(RelaxedOp);
843 }
844
845 /// Return true if this instruction has been fully relaxed into it's most
846 /// general available form.
isFullyRelaxed(const MCRelaxableFragment & RF)847 static bool isFullyRelaxed(const MCRelaxableFragment &RF) {
848 auto &Inst = RF.getInst();
849 auto &STI = *RF.getSubtargetInfo();
850 bool Is16BitMode = STI.getFeatureBits()[X86::Mode16Bit];
851 return getRelaxedOpcode(Inst, Is16BitMode) == Inst.getOpcode();
852 }
853
padInstructionViaPrefix(MCRelaxableFragment & RF,MCCodeEmitter & Emitter,unsigned & RemainingSize) const854 bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF,
855 MCCodeEmitter &Emitter,
856 unsigned &RemainingSize) const {
857 if (!RF.getAllowAutoPadding())
858 return false;
859 // If the instruction isn't fully relaxed, shifting it around might require a
860 // larger value for one of the fixups then can be encoded. The outer loop
861 // will also catch this before moving to the next instruction, but we need to
862 // prevent padding this single instruction as well.
863 if (!isFullyRelaxed(RF))
864 return false;
865
866 const unsigned OldSize = RF.getContents().size();
867 if (OldSize == 15)
868 return false;
869
870 const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize);
871 const unsigned RemainingPrefixSize = [&]() -> unsigned {
872 SmallString<15> Code;
873 raw_svector_ostream VecOS(Code);
874 Emitter.emitPrefix(RF.getInst(), VecOS, STI);
875 assert(Code.size() < 15 && "The number of prefixes must be less than 15.");
876
877 // TODO: It turns out we need a decent amount of plumbing for the target
878 // specific bits to determine number of prefixes its safe to add. Various
879 // targets (older chips mostly, but also Atom family) encounter decoder
880 // stalls with too many prefixes. For testing purposes, we set the value
881 // externally for the moment.
882 unsigned ExistingPrefixSize = Code.size();
883 if (TargetPrefixMax <= ExistingPrefixSize)
884 return 0;
885 return TargetPrefixMax - ExistingPrefixSize;
886 }();
887 const unsigned PrefixBytesToAdd =
888 std::min(MaxPossiblePad, RemainingPrefixSize);
889 if (PrefixBytesToAdd == 0)
890 return false;
891
892 const uint8_t Prefix = determinePaddingPrefix(RF.getInst());
893
894 SmallString<256> Code;
895 Code.append(PrefixBytesToAdd, Prefix);
896 Code.append(RF.getContents().begin(), RF.getContents().end());
897 RF.getContents() = Code;
898
899 // Adjust the fixups for the change in offsets
900 for (auto &F : RF.getFixups()) {
901 F.setOffset(F.getOffset() + PrefixBytesToAdd);
902 }
903
904 RemainingSize -= PrefixBytesToAdd;
905 return true;
906 }
907
padInstructionViaRelaxation(MCRelaxableFragment & RF,MCCodeEmitter & Emitter,unsigned & RemainingSize) const908 bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF,
909 MCCodeEmitter &Emitter,
910 unsigned &RemainingSize) const {
911 if (isFullyRelaxed(RF))
912 // TODO: There are lots of other tricks we could apply for increasing
913 // encoding size without impacting performance.
914 return false;
915
916 MCInst Relaxed = RF.getInst();
917 relaxInstruction(Relaxed, *RF.getSubtargetInfo());
918
919 SmallVector<MCFixup, 4> Fixups;
920 SmallString<15> Code;
921 raw_svector_ostream VecOS(Code);
922 Emitter.encodeInstruction(Relaxed, VecOS, Fixups, *RF.getSubtargetInfo());
923 const unsigned OldSize = RF.getContents().size();
924 const unsigned NewSize = Code.size();
925 assert(NewSize >= OldSize && "size decrease during relaxation?");
926 unsigned Delta = NewSize - OldSize;
927 if (Delta > RemainingSize)
928 return false;
929 RF.setInst(Relaxed);
930 RF.getContents() = Code;
931 RF.getFixups() = Fixups;
932 RemainingSize -= Delta;
933 return true;
934 }
935
padInstructionEncoding(MCRelaxableFragment & RF,MCCodeEmitter & Emitter,unsigned & RemainingSize) const936 bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF,
937 MCCodeEmitter &Emitter,
938 unsigned &RemainingSize) const {
939 bool Changed = false;
940 if (RemainingSize != 0)
941 Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize);
942 if (RemainingSize != 0)
943 Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize);
944 return Changed;
945 }
946
finishLayout(MCAssembler const & Asm,MCAsmLayout & Layout) const947 void X86AsmBackend::finishLayout(MCAssembler const &Asm,
948 MCAsmLayout &Layout) const {
949 // See if we can further relax some instructions to cut down on the number of
950 // nop bytes required for code alignment. The actual win is in reducing
951 // instruction count, not number of bytes. Modern X86-64 can easily end up
952 // decode limited. It is often better to reduce the number of instructions
953 // (i.e. eliminate nops) even at the cost of increasing the size and
954 // complexity of others.
955 if (!X86PadForAlign && !X86PadForBranchAlign)
956 return;
957
958 DenseSet<MCFragment *> LabeledFragments;
959 for (const MCSymbol &S : Asm.symbols())
960 LabeledFragments.insert(S.getFragment(false));
961
962 for (MCSection &Sec : Asm) {
963 if (!Sec.getKind().isText())
964 continue;
965
966 SmallVector<MCRelaxableFragment *, 4> Relaxable;
967 for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) {
968 MCFragment &F = *I;
969
970 if (LabeledFragments.count(&F))
971 Relaxable.clear();
972
973 if (F.getKind() == MCFragment::FT_Data ||
974 F.getKind() == MCFragment::FT_CompactEncodedInst)
975 // Skip and ignore
976 continue;
977
978 if (F.getKind() == MCFragment::FT_Relaxable) {
979 auto &RF = cast<MCRelaxableFragment>(*I);
980 Relaxable.push_back(&RF);
981 continue;
982 }
983
984 auto canHandle = [](MCFragment &F) -> bool {
985 switch (F.getKind()) {
986 default:
987 return false;
988 case MCFragment::FT_Align:
989 return X86PadForAlign;
990 case MCFragment::FT_BoundaryAlign:
991 return X86PadForBranchAlign;
992 }
993 };
994 // For any unhandled kind, assume we can't change layout.
995 if (!canHandle(F)) {
996 Relaxable.clear();
997 continue;
998 }
999
1000 #ifndef NDEBUG
1001 const uint64_t OrigOffset = Layout.getFragmentOffset(&F);
1002 #endif
1003 const uint64_t OrigSize = Asm.computeFragmentSize(Layout, F);
1004
1005 // To keep the effects local, prefer to relax instructions closest to
1006 // the align directive. This is purely about human understandability
1007 // of the resulting code. If we later find a reason to expand
1008 // particular instructions over others, we can adjust.
1009 MCFragment *FirstChangedFragment = nullptr;
1010 unsigned RemainingSize = OrigSize;
1011 while (!Relaxable.empty() && RemainingSize != 0) {
1012 auto &RF = *Relaxable.pop_back_val();
1013 // Give the backend a chance to play any tricks it wishes to increase
1014 // the encoding size of the given instruction. Target independent code
1015 // will try further relaxation, but target's may play further tricks.
1016 if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize))
1017 FirstChangedFragment = &RF;
1018
1019 // If we have an instruction which hasn't been fully relaxed, we can't
1020 // skip past it and insert bytes before it. Changing its starting
1021 // offset might require a larger negative offset than it can encode.
1022 // We don't need to worry about larger positive offsets as none of the
1023 // possible offsets between this and our align are visible, and the
1024 // ones afterwards aren't changing.
1025 if (!isFullyRelaxed(RF))
1026 break;
1027 }
1028 Relaxable.clear();
1029
1030 if (FirstChangedFragment) {
1031 // Make sure the offsets for any fragments in the effected range get
1032 // updated. Note that this (conservatively) invalidates the offsets of
1033 // those following, but this is not required.
1034 Layout.invalidateFragmentsFrom(FirstChangedFragment);
1035 }
1036
1037 // BoundaryAlign explicitly tracks it's size (unlike align)
1038 if (F.getKind() == MCFragment::FT_BoundaryAlign)
1039 cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize);
1040
1041 #ifndef NDEBUG
1042 const uint64_t FinalOffset = Layout.getFragmentOffset(&F);
1043 const uint64_t FinalSize = Asm.computeFragmentSize(Layout, F);
1044 assert(OrigOffset + OrigSize == FinalOffset + FinalSize &&
1045 "can't move start of next fragment!");
1046 assert(FinalSize == RemainingSize && "inconsistent size computation?");
1047 #endif
1048
1049 // If we're looking at a boundary align, make sure we don't try to pad
1050 // its target instructions for some following directive. Doing so would
1051 // break the alignment of the current boundary align.
1052 if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) {
1053 const MCFragment *LastFragment = BF->getLastFragment();
1054 if (!LastFragment)
1055 continue;
1056 while (&*I != LastFragment)
1057 ++I;
1058 }
1059 }
1060 }
1061
1062 // The layout is done. Mark every fragment as valid.
1063 for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
1064 MCSection &Section = *Layout.getSectionOrder()[i];
1065 Layout.getFragmentOffset(&*Section.getFragmentList().rbegin());
1066 Asm.computeFragmentSize(Layout, *Section.getFragmentList().rbegin());
1067 }
1068 }
1069
1070 /// Write a sequence of optimal nops to the output, covering \p Count
1071 /// bytes.
1072 /// \return - true on success, false on failure
writeNopData(raw_ostream & OS,uint64_t Count) const1073 bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
1074 static const char Nops[10][11] = {
1075 // nop
1076 "\x90",
1077 // xchg %ax,%ax
1078 "\x66\x90",
1079 // nopl (%[re]ax)
1080 "\x0f\x1f\x00",
1081 // nopl 0(%[re]ax)
1082 "\x0f\x1f\x40\x00",
1083 // nopl 0(%[re]ax,%[re]ax,1)
1084 "\x0f\x1f\x44\x00\x00",
1085 // nopw 0(%[re]ax,%[re]ax,1)
1086 "\x66\x0f\x1f\x44\x00\x00",
1087 // nopl 0L(%[re]ax)
1088 "\x0f\x1f\x80\x00\x00\x00\x00",
1089 // nopl 0L(%[re]ax,%[re]ax,1)
1090 "\x0f\x1f\x84\x00\x00\x00\x00\x00",
1091 // nopw 0L(%[re]ax,%[re]ax,1)
1092 "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00",
1093 // nopw %cs:0L(%[re]ax,%[re]ax,1)
1094 "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00",
1095 };
1096
1097 // This CPU doesn't support long nops. If needed add more.
1098 // FIXME: We could generated something better than plain 0x90.
1099 if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Mode64Bit)) {
1100 for (uint64_t i = 0; i < Count; ++i)
1101 OS << '\x90';
1102 return true;
1103 }
1104
1105 // 15-bytes is the longest single NOP instruction, but 10-bytes is
1106 // commonly the longest that can be efficiently decoded.
1107 uint64_t MaxNopLength = 10;
1108 if (STI.getFeatureBits()[X86::FeatureFast7ByteNOP])
1109 MaxNopLength = 7;
1110 else if (STI.getFeatureBits()[X86::FeatureFast15ByteNOP])
1111 MaxNopLength = 15;
1112 else if (STI.getFeatureBits()[X86::FeatureFast11ByteNOP])
1113 MaxNopLength = 11;
1114
1115 // Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining
1116 // length.
1117 do {
1118 const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength);
1119 const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;
1120 for (uint8_t i = 0; i < Prefixes; i++)
1121 OS << '\x66';
1122 const uint8_t Rest = ThisNopLength - Prefixes;
1123 if (Rest != 0)
1124 OS.write(Nops[Rest - 1], Rest);
1125 Count -= ThisNopLength;
1126 } while (Count != 0);
1127
1128 return true;
1129 }
1130
1131 /* *** */
1132
1133 namespace {
1134
1135 class ELFX86AsmBackend : public X86AsmBackend {
1136 public:
1137 uint8_t OSABI;
ELFX86AsmBackend(const Target & T,uint8_t OSABI,const MCSubtargetInfo & STI)1138 ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI)
1139 : X86AsmBackend(T, STI), OSABI(OSABI) {}
1140 };
1141
1142 class ELFX86_32AsmBackend : public ELFX86AsmBackend {
1143 public:
ELFX86_32AsmBackend(const Target & T,uint8_t OSABI,const MCSubtargetInfo & STI)1144 ELFX86_32AsmBackend(const Target &T, uint8_t OSABI,
1145 const MCSubtargetInfo &STI)
1146 : ELFX86AsmBackend(T, OSABI, STI) {}
1147
1148 std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const1149 createObjectTargetWriter() const override {
1150 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, ELF::EM_386);
1151 }
1152 };
1153
1154 class ELFX86_X32AsmBackend : public ELFX86AsmBackend {
1155 public:
ELFX86_X32AsmBackend(const Target & T,uint8_t OSABI,const MCSubtargetInfo & STI)1156 ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI,
1157 const MCSubtargetInfo &STI)
1158 : ELFX86AsmBackend(T, OSABI, STI) {}
1159
1160 std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const1161 createObjectTargetWriter() const override {
1162 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1163 ELF::EM_X86_64);
1164 }
1165 };
1166
1167 class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend {
1168 public:
ELFX86_IAMCUAsmBackend(const Target & T,uint8_t OSABI,const MCSubtargetInfo & STI)1169 ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI,
1170 const MCSubtargetInfo &STI)
1171 : ELFX86AsmBackend(T, OSABI, STI) {}
1172
1173 std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const1174 createObjectTargetWriter() const override {
1175 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1176 ELF::EM_IAMCU);
1177 }
1178 };
1179
1180 class ELFX86_64AsmBackend : public ELFX86AsmBackend {
1181 public:
ELFX86_64AsmBackend(const Target & T,uint8_t OSABI,const MCSubtargetInfo & STI)1182 ELFX86_64AsmBackend(const Target &T, uint8_t OSABI,
1183 const MCSubtargetInfo &STI)
1184 : ELFX86AsmBackend(T, OSABI, STI) {}
1185
1186 std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const1187 createObjectTargetWriter() const override {
1188 return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI, ELF::EM_X86_64);
1189 }
1190 };
1191
1192 class WindowsX86AsmBackend : public X86AsmBackend {
1193 bool Is64Bit;
1194
1195 public:
WindowsX86AsmBackend(const Target & T,bool is64Bit,const MCSubtargetInfo & STI)1196 WindowsX86AsmBackend(const Target &T, bool is64Bit,
1197 const MCSubtargetInfo &STI)
1198 : X86AsmBackend(T, STI)
1199 , Is64Bit(is64Bit) {
1200 }
1201
getFixupKind(StringRef Name) const1202 Optional<MCFixupKind> getFixupKind(StringRef Name) const override {
1203 return StringSwitch<Optional<MCFixupKind>>(Name)
1204 .Case("dir32", FK_Data_4)
1205 .Case("secrel32", FK_SecRel_4)
1206 .Case("secidx", FK_SecRel_2)
1207 .Default(MCAsmBackend::getFixupKind(Name));
1208 }
1209
1210 std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const1211 createObjectTargetWriter() const override {
1212 return createX86WinCOFFObjectWriter(Is64Bit);
1213 }
1214 };
1215
1216 namespace CU {
1217
1218 /// Compact unwind encoding values.
1219 enum CompactUnwindEncodings {
1220 /// [RE]BP based frame where [RE]BP is pused on the stack immediately after
1221 /// the return address, then [RE]SP is moved to [RE]BP.
1222 UNWIND_MODE_BP_FRAME = 0x01000000,
1223
1224 /// A frameless function with a small constant stack size.
1225 UNWIND_MODE_STACK_IMMD = 0x02000000,
1226
1227 /// A frameless function with a large constant stack size.
1228 UNWIND_MODE_STACK_IND = 0x03000000,
1229
1230 /// No compact unwind encoding is available.
1231 UNWIND_MODE_DWARF = 0x04000000,
1232
1233 /// Mask for encoding the frame registers.
1234 UNWIND_BP_FRAME_REGISTERS = 0x00007FFF,
1235
1236 /// Mask for encoding the frameless registers.
1237 UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
1238 };
1239
1240 } // end CU namespace
1241
1242 class DarwinX86AsmBackend : public X86AsmBackend {
1243 const MCRegisterInfo &MRI;
1244
1245 /// Number of registers that can be saved in a compact unwind encoding.
1246 enum { CU_NUM_SAVED_REGS = 6 };
1247
1248 mutable unsigned SavedRegs[CU_NUM_SAVED_REGS];
1249 Triple TT;
1250 bool Is64Bit;
1251
1252 unsigned OffsetSize; ///< Offset of a "push" instruction.
1253 unsigned MoveInstrSize; ///< Size of a "move" instruction.
1254 unsigned StackDivide; ///< Amount to adjust stack size by.
1255 protected:
1256 /// Size of a "push" instruction for the given register.
PushInstrSize(unsigned Reg) const1257 unsigned PushInstrSize(unsigned Reg) const {
1258 switch (Reg) {
1259 case X86::EBX:
1260 case X86::ECX:
1261 case X86::EDX:
1262 case X86::EDI:
1263 case X86::ESI:
1264 case X86::EBP:
1265 case X86::RBX:
1266 case X86::RBP:
1267 return 1;
1268 case X86::R12:
1269 case X86::R13:
1270 case X86::R14:
1271 case X86::R15:
1272 return 2;
1273 }
1274 return 1;
1275 }
1276
1277 private:
1278 /// Get the compact unwind number for a given register. The number
1279 /// corresponds to the enum lists in compact_unwind_encoding.h.
getCompactUnwindRegNum(unsigned Reg) const1280 int getCompactUnwindRegNum(unsigned Reg) const {
1281 static const MCPhysReg CU32BitRegs[7] = {
1282 X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
1283 };
1284 static const MCPhysReg CU64BitRegs[] = {
1285 X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
1286 };
1287 const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
1288 for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
1289 if (*CURegs == Reg)
1290 return Idx;
1291
1292 return -1;
1293 }
1294
1295 /// Return the registers encoded for a compact encoding with a frame
1296 /// pointer.
encodeCompactUnwindRegistersWithFrame() const1297 uint32_t encodeCompactUnwindRegistersWithFrame() const {
1298 // Encode the registers in the order they were saved --- 3-bits per
1299 // register. The list of saved registers is assumed to be in reverse
1300 // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.
1301 uint32_t RegEnc = 0;
1302 for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) {
1303 unsigned Reg = SavedRegs[i];
1304 if (Reg == 0) break;
1305
1306 int CURegNum = getCompactUnwindRegNum(Reg);
1307 if (CURegNum == -1) return ~0U;
1308
1309 // Encode the 3-bit register number in order, skipping over 3-bits for
1310 // each register.
1311 RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
1312 }
1313
1314 assert((RegEnc & 0x3FFFF) == RegEnc &&
1315 "Invalid compact register encoding!");
1316 return RegEnc;
1317 }
1318
1319 /// Create the permutation encoding used with frameless stacks. It is
1320 /// passed the number of registers to be saved and an array of the registers
1321 /// saved.
encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const1322 uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const {
1323 // The saved registers are numbered from 1 to 6. In order to encode the
1324 // order in which they were saved, we re-number them according to their
1325 // place in the register order. The re-numbering is relative to the last
1326 // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in
1327 // that order:
1328 //
1329 // Orig Re-Num
1330 // ---- ------
1331 // 6 6
1332 // 2 2
1333 // 4 3
1334 // 5 3
1335 //
1336 for (unsigned i = 0; i < RegCount; ++i) {
1337 int CUReg = getCompactUnwindRegNum(SavedRegs[i]);
1338 if (CUReg == -1) return ~0U;
1339 SavedRegs[i] = CUReg;
1340 }
1341
1342 // Reverse the list.
1343 std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]);
1344
1345 uint32_t RenumRegs[CU_NUM_SAVED_REGS];
1346 for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){
1347 unsigned Countless = 0;
1348 for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
1349 if (SavedRegs[j] < SavedRegs[i])
1350 ++Countless;
1351
1352 RenumRegs[i] = SavedRegs[i] - Countless - 1;
1353 }
1354
1355 // Take the renumbered values and encode them into a 10-bit number.
1356 uint32_t permutationEncoding = 0;
1357 switch (RegCount) {
1358 case 6:
1359 permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
1360 + 6 * RenumRegs[2] + 2 * RenumRegs[3]
1361 + RenumRegs[4];
1362 break;
1363 case 5:
1364 permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
1365 + 6 * RenumRegs[3] + 2 * RenumRegs[4]
1366 + RenumRegs[5];
1367 break;
1368 case 4:
1369 permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3]
1370 + 3 * RenumRegs[4] + RenumRegs[5];
1371 break;
1372 case 3:
1373 permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4]
1374 + RenumRegs[5];
1375 break;
1376 case 2:
1377 permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5];
1378 break;
1379 case 1:
1380 permutationEncoding |= RenumRegs[5];
1381 break;
1382 }
1383
1384 assert((permutationEncoding & 0x3FF) == permutationEncoding &&
1385 "Invalid compact register encoding!");
1386 return permutationEncoding;
1387 }
1388
1389 public:
DarwinX86AsmBackend(const Target & T,const MCRegisterInfo & MRI,const MCSubtargetInfo & STI)1390 DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI,
1391 const MCSubtargetInfo &STI)
1392 : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()),
1393 Is64Bit(TT.isArch64Bit()) {
1394 memset(SavedRegs, 0, sizeof(SavedRegs));
1395 OffsetSize = Is64Bit ? 8 : 4;
1396 MoveInstrSize = Is64Bit ? 3 : 2;
1397 StackDivide = Is64Bit ? 8 : 4;
1398 }
1399
1400 std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const1401 createObjectTargetWriter() const override {
1402 uint32_t CPUType = cantFail(MachO::getCPUType(TT));
1403 uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT));
1404 return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType);
1405 }
1406
1407 /// Implementation of algorithm to generate the compact unwind encoding
1408 /// for the CFI instructions.
1409 uint32_t
generateCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs) const1410 generateCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs) const override {
1411 if (Instrs.empty()) return 0;
1412
1413 // Reset the saved registers.
1414 unsigned SavedRegIdx = 0;
1415 memset(SavedRegs, 0, sizeof(SavedRegs));
1416
1417 bool HasFP = false;
1418
1419 // Encode that we are using EBP/RBP as the frame pointer.
1420 uint32_t CompactUnwindEncoding = 0;
1421
1422 unsigned SubtractInstrIdx = Is64Bit ? 3 : 2;
1423 unsigned InstrOffset = 0;
1424 unsigned StackAdjust = 0;
1425 unsigned StackSize = 0;
1426 unsigned NumDefCFAOffsets = 0;
1427
1428 for (unsigned i = 0, e = Instrs.size(); i != e; ++i) {
1429 const MCCFIInstruction &Inst = Instrs[i];
1430
1431 switch (Inst.getOperation()) {
1432 default:
1433 // Any other CFI directives indicate a frame that we aren't prepared
1434 // to represent via compact unwind, so just bail out.
1435 return 0;
1436 case MCCFIInstruction::OpDefCfaRegister: {
1437 // Defines a frame pointer. E.g.
1438 //
1439 // movq %rsp, %rbp
1440 // L0:
1441 // .cfi_def_cfa_register %rbp
1442 //
1443 HasFP = true;
1444
1445 // If the frame pointer is other than esp/rsp, we do not have a way to
1446 // generate a compact unwinding representation, so bail out.
1447 if (*MRI.getLLVMRegNum(Inst.getRegister(), true) !=
1448 (Is64Bit ? X86::RBP : X86::EBP))
1449 return 0;
1450
1451 // Reset the counts.
1452 memset(SavedRegs, 0, sizeof(SavedRegs));
1453 StackAdjust = 0;
1454 SavedRegIdx = 0;
1455 InstrOffset += MoveInstrSize;
1456 break;
1457 }
1458 case MCCFIInstruction::OpDefCfaOffset: {
1459 // Defines a new offset for the CFA. E.g.
1460 //
1461 // With frame:
1462 //
1463 // pushq %rbp
1464 // L0:
1465 // .cfi_def_cfa_offset 16
1466 //
1467 // Without frame:
1468 //
1469 // subq $72, %rsp
1470 // L0:
1471 // .cfi_def_cfa_offset 80
1472 //
1473 StackSize = Inst.getOffset() / StackDivide;
1474 ++NumDefCFAOffsets;
1475 break;
1476 }
1477 case MCCFIInstruction::OpOffset: {
1478 // Defines a "push" of a callee-saved register. E.g.
1479 //
1480 // pushq %r15
1481 // pushq %r14
1482 // pushq %rbx
1483 // L0:
1484 // subq $120, %rsp
1485 // L1:
1486 // .cfi_offset %rbx, -40
1487 // .cfi_offset %r14, -32
1488 // .cfi_offset %r15, -24
1489 //
1490 if (SavedRegIdx == CU_NUM_SAVED_REGS)
1491 // If there are too many saved registers, we cannot use a compact
1492 // unwind encoding.
1493 return CU::UNWIND_MODE_DWARF;
1494
1495 unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true);
1496 SavedRegs[SavedRegIdx++] = Reg;
1497 StackAdjust += OffsetSize;
1498 InstrOffset += PushInstrSize(Reg);
1499 break;
1500 }
1501 }
1502 }
1503
1504 StackAdjust /= StackDivide;
1505
1506 if (HasFP) {
1507 if ((StackAdjust & 0xFF) != StackAdjust)
1508 // Offset was too big for a compact unwind encoding.
1509 return CU::UNWIND_MODE_DWARF;
1510
1511 // Get the encoding of the saved registers when we have a frame pointer.
1512 uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame();
1513 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1514
1515 CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME;
1516 CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
1517 CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
1518 } else {
1519 SubtractInstrIdx += InstrOffset;
1520 ++StackAdjust;
1521
1522 if ((StackSize & 0xFF) == StackSize) {
1523 // Frameless stack with a small stack size.
1524 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD;
1525
1526 // Encode the stack size.
1527 CompactUnwindEncoding |= (StackSize & 0xFF) << 16;
1528 } else {
1529 if ((StackAdjust & 0x7) != StackAdjust)
1530 // The extra stack adjustments are too big for us to handle.
1531 return CU::UNWIND_MODE_DWARF;
1532
1533 // Frameless stack with an offset too large for us to encode compactly.
1534 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND;
1535
1536 // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
1537 // instruction.
1538 CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
1539
1540 // Encode any extra stack adjustments (done via push instructions).
1541 CompactUnwindEncoding |= (StackAdjust & 0x7) << 13;
1542 }
1543
1544 // Encode the number of registers saved. (Reverse the list first.)
1545 std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]);
1546 CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10;
1547
1548 // Get the encoding of the saved registers when we don't have a frame
1549 // pointer.
1550 uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx);
1551 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1552
1553 // Encode the register encoding.
1554 CompactUnwindEncoding |=
1555 RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION;
1556 }
1557
1558 return CompactUnwindEncoding;
1559 }
1560 };
1561
1562 } // end anonymous namespace
1563
createX86_32AsmBackend(const Target & T,const MCSubtargetInfo & STI,const MCRegisterInfo & MRI,const MCTargetOptions & Options)1564 MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
1565 const MCSubtargetInfo &STI,
1566 const MCRegisterInfo &MRI,
1567 const MCTargetOptions &Options) {
1568 const Triple &TheTriple = STI.getTargetTriple();
1569 if (TheTriple.isOSBinFormatMachO())
1570 return new DarwinX86AsmBackend(T, MRI, STI);
1571
1572 if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1573 return new WindowsX86AsmBackend(T, false, STI);
1574
1575 uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1576
1577 if (TheTriple.isOSIAMCU())
1578 return new ELFX86_IAMCUAsmBackend(T, OSABI, STI);
1579
1580 return new ELFX86_32AsmBackend(T, OSABI, STI);
1581 }
1582
createX86_64AsmBackend(const Target & T,const MCSubtargetInfo & STI,const MCRegisterInfo & MRI,const MCTargetOptions & Options)1583 MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
1584 const MCSubtargetInfo &STI,
1585 const MCRegisterInfo &MRI,
1586 const MCTargetOptions &Options) {
1587 const Triple &TheTriple = STI.getTargetTriple();
1588 if (TheTriple.isOSBinFormatMachO())
1589 return new DarwinX86AsmBackend(T, MRI, STI);
1590
1591 if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1592 return new WindowsX86AsmBackend(T, true, STI);
1593
1594 uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1595
1596 if (TheTriple.getEnvironment() == Triple::GNUX32)
1597 return new ELFX86_X32AsmBackend(T, OSABI, STI);
1598 return new ELFX86_64AsmBackend(T, OSABI, STI);
1599 }
1600