1 //===- X86.cpp ------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "InputFiles.h"
10 #include "Symbols.h"
11 #include "SyntheticSections.h"
12 #include "Target.h"
13 #include "lld/Common/ErrorHandler.h"
14 #include "llvm/Support/Endian.h"
15 
16 using namespace llvm;
17 using namespace llvm::support::endian;
18 using namespace llvm::ELF;
19 
20 namespace lld {
21 namespace elf {
22 
23 namespace {
24 class X86 : public TargetInfo {
25 public:
26   X86();
27   int getTlsGdRelaxSkip(RelType type) const override;
28   RelExpr getRelExpr(RelType type, const Symbol &s,
29                      const uint8_t *loc) const override;
30   int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
31   void writeGotPltHeader(uint8_t *buf) const override;
32   RelType getDynRel(RelType type) const override;
33   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
34   void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
35   void writePltHeader(uint8_t *buf) const override;
36   void writePlt(uint8_t *buf, const Symbol &sym,
37                 uint64_t pltEntryAddr) const override;
38   void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override;
39 
40   RelExpr adjustRelaxExpr(RelType type, const uint8_t *data,
41                           RelExpr expr) const override;
42   void relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const override;
43   void relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const override;
44   void relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const override;
45   void relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const override;
46 };
47 } // namespace
48 
X86()49 X86::X86() {
50   copyRel = R_386_COPY;
51   gotRel = R_386_GLOB_DAT;
52   noneRel = R_386_NONE;
53   pltRel = R_386_JUMP_SLOT;
54   iRelativeRel = R_386_IRELATIVE;
55   relativeRel = R_386_RELATIVE;
56   symbolicRel = R_386_32;
57   tlsGotRel = R_386_TLS_TPOFF;
58   tlsModuleIndexRel = R_386_TLS_DTPMOD32;
59   tlsOffsetRel = R_386_TLS_DTPOFF32;
60   pltHeaderSize = 16;
61   pltEntrySize = 16;
62   ipltEntrySize = 16;
63   trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
64 
65   // Align to the non-PAE large page size (known as a superpage or huge page).
66   // FreeBSD automatically promotes large, superpage-aligned allocations.
67   defaultImageBase = 0x400000;
68 }
69 
getTlsGdRelaxSkip(RelType type) const70 int X86::getTlsGdRelaxSkip(RelType type) const {
71   return 2;
72 }
73 
getRelExpr(RelType type,const Symbol & s,const uint8_t * loc) const74 RelExpr X86::getRelExpr(RelType type, const Symbol &s,
75                         const uint8_t *loc) const {
76   // There are 4 different TLS variable models with varying degrees of
77   // flexibility and performance. LocalExec and InitialExec models are fast but
78   // less-flexible models. If they are in use, we set DF_STATIC_TLS flag in the
79   // dynamic section to let runtime know about that.
80   if (type == R_386_TLS_LE || type == R_386_TLS_LE_32 || type == R_386_TLS_IE ||
81       type == R_386_TLS_GOTIE)
82     config->hasStaticTlsModel = true;
83 
84   switch (type) {
85   case R_386_8:
86   case R_386_16:
87   case R_386_32:
88     return R_ABS;
89   case R_386_TLS_LDO_32:
90     return R_DTPREL;
91   case R_386_TLS_GD:
92     return R_TLSGD_GOTPLT;
93   case R_386_TLS_LDM:
94     return R_TLSLD_GOTPLT;
95   case R_386_PLT32:
96     return R_PLT_PC;
97   case R_386_PC8:
98   case R_386_PC16:
99   case R_386_PC32:
100     return R_PC;
101   case R_386_GOTPC:
102     return R_GOTPLTONLY_PC;
103   case R_386_TLS_IE:
104     return R_GOT;
105   case R_386_GOT32:
106   case R_386_GOT32X:
107     // These relocations are arguably mis-designed because their calculations
108     // depend on the instructions they are applied to. This is bad because we
109     // usually don't care about whether the target section contains valid
110     // machine instructions or not. But this is part of the documented ABI, so
111     // we had to implement as the standard requires.
112     //
113     // x86 does not support PC-relative data access. Therefore, in order to
114     // access GOT contents, a GOT address needs to be known at link-time
115     // (which means non-PIC) or compilers have to emit code to get a GOT
116     // address at runtime (which means code is position-independent but
117     // compilers need to emit extra code for each GOT access.) This decision
118     // is made at compile-time. In the latter case, compilers emit code to
119     // load a GOT address to a register, which is usually %ebx.
120     //
121     // So, there are two ways to refer to symbol foo's GOT entry: foo@GOT or
122     // foo@GOT(%ebx).
123     //
124     // foo@GOT is not usable in PIC. If we are creating a PIC output and if we
125     // find such relocation, we should report an error. foo@GOT is resolved to
126     // an *absolute* address of foo's GOT entry, because both GOT address and
127     // foo's offset are known. In other words, it's G + A.
128     //
129     // foo@GOT(%ebx) needs to be resolved to a *relative* offset from a GOT to
130     // foo's GOT entry in the table, because GOT address is not known but foo's
131     // offset in the table is known. It's G + A - GOT.
132     //
133     // It's unfortunate that compilers emit the same relocation for these
134     // different use cases. In order to distinguish them, we have to read a
135     // machine instruction.
136     //
137     // The following code implements it. We assume that Loc[0] is the first byte
138     // of a displacement or an immediate field of a valid machine
139     // instruction. That means a ModRM byte is at Loc[-1]. By taking a look at
140     // the byte, we can determine whether the instruction uses the operand as an
141     // absolute address (R_GOT) or a register-relative address (R_GOTPLT).
142     return (loc[-1] & 0xc7) == 0x5 ? R_GOT : R_GOTPLT;
143   case R_386_TLS_GOTIE:
144     return R_GOTPLT;
145   case R_386_GOTOFF:
146     return R_GOTPLTREL;
147   case R_386_TLS_LE:
148     return R_TLS;
149   case R_386_TLS_LE_32:
150     return R_NEG_TLS;
151   case R_386_NONE:
152     return R_NONE;
153   default:
154     error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
155           ") against symbol " + toString(s));
156     return R_NONE;
157   }
158 }
159 
adjustRelaxExpr(RelType type,const uint8_t * data,RelExpr expr) const160 RelExpr X86::adjustRelaxExpr(RelType type, const uint8_t *data,
161                              RelExpr expr) const {
162   switch (expr) {
163   default:
164     return expr;
165   case R_RELAX_TLS_GD_TO_IE:
166     return R_RELAX_TLS_GD_TO_IE_GOTPLT;
167   case R_RELAX_TLS_GD_TO_LE:
168     return R_RELAX_TLS_GD_TO_LE_NEG;
169   }
170 }
171 
writeGotPltHeader(uint8_t * buf) const172 void X86::writeGotPltHeader(uint8_t *buf) const {
173   write32le(buf, mainPart->dynamic->getVA());
174 }
175 
writeGotPlt(uint8_t * buf,const Symbol & s) const176 void X86::writeGotPlt(uint8_t *buf, const Symbol &s) const {
177   // Entries in .got.plt initially points back to the corresponding
178   // PLT entries with a fixed offset to skip the first instruction.
179   write32le(buf, s.getPltVA() + 6);
180 }
181 
writeIgotPlt(uint8_t * buf,const Symbol & s) const182 void X86::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
183   // An x86 entry is the address of the ifunc resolver function.
184   write32le(buf, s.getVA());
185 }
186 
getDynRel(RelType type) const187 RelType X86::getDynRel(RelType type) const {
188   if (type == R_386_TLS_LE)
189     return R_386_TLS_TPOFF;
190   if (type == R_386_TLS_LE_32)
191     return R_386_TLS_TPOFF32;
192   return type;
193 }
194 
writePltHeader(uint8_t * buf) const195 void X86::writePltHeader(uint8_t *buf) const {
196   if (config->isPic) {
197     const uint8_t v[] = {
198         0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl 4(%ebx)
199         0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *8(%ebx)
200         0x90, 0x90, 0x90, 0x90              // nop
201     };
202     memcpy(buf, v, sizeof(v));
203     return;
204   }
205 
206   const uint8_t pltData[] = {
207       0xff, 0x35, 0, 0, 0, 0, // pushl (GOTPLT+4)
208       0xff, 0x25, 0, 0, 0, 0, // jmp *(GOTPLT+8)
209       0x90, 0x90, 0x90, 0x90, // nop
210   };
211   memcpy(buf, pltData, sizeof(pltData));
212   uint32_t gotPlt = in.gotPlt->getVA();
213   write32le(buf + 2, gotPlt + 4);
214   write32le(buf + 8, gotPlt + 8);
215 }
216 
writePlt(uint8_t * buf,const Symbol & sym,uint64_t pltEntryAddr) const217 void X86::writePlt(uint8_t *buf, const Symbol &sym,
218                    uint64_t pltEntryAddr) const {
219   unsigned relOff = in.relaPlt->entsize * sym.pltIndex;
220   if (config->isPic) {
221     const uint8_t inst[] = {
222         0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx)
223         0x68, 0,    0, 0, 0,    // pushl $reloc_offset
224         0xe9, 0,    0, 0, 0,    // jmp .PLT0@PC
225     };
226     memcpy(buf, inst, sizeof(inst));
227     write32le(buf + 2, sym.getGotPltVA() - in.gotPlt->getVA());
228   } else {
229     const uint8_t inst[] = {
230         0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
231         0x68, 0,    0, 0, 0,    // pushl $reloc_offset
232         0xe9, 0,    0, 0, 0,    // jmp .PLT0@PC
233     };
234     memcpy(buf, inst, sizeof(inst));
235     write32le(buf + 2, sym.getGotPltVA());
236   }
237 
238   write32le(buf + 7, relOff);
239   write32le(buf + 12, in.plt->getVA() - pltEntryAddr - 16);
240 }
241 
getImplicitAddend(const uint8_t * buf,RelType type) const242 int64_t X86::getImplicitAddend(const uint8_t *buf, RelType type) const {
243   switch (type) {
244   case R_386_8:
245   case R_386_PC8:
246     return SignExtend64<8>(*buf);
247   case R_386_16:
248   case R_386_PC16:
249     return SignExtend64<16>(read16le(buf));
250   case R_386_32:
251   case R_386_GOT32:
252   case R_386_GOT32X:
253   case R_386_GOTOFF:
254   case R_386_GOTPC:
255   case R_386_PC32:
256   case R_386_PLT32:
257   case R_386_TLS_LDO_32:
258   case R_386_TLS_LE:
259     return SignExtend64<32>(read32le(buf));
260   default:
261     return 0;
262   }
263 }
264 
relocateOne(uint8_t * loc,RelType type,uint64_t val) const265 void X86::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
266   switch (type) {
267   case R_386_8:
268     // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are
269     // being used for some 16-bit programs such as boot loaders, so
270     // we want to support them.
271     checkIntUInt(loc, val, 8, type);
272     *loc = val;
273     break;
274   case R_386_PC8:
275     checkInt(loc, val, 8, type);
276     *loc = val;
277     break;
278   case R_386_16:
279     checkIntUInt(loc, val, 16, type);
280     write16le(loc, val);
281     break;
282   case R_386_PC16:
283     // R_386_PC16 is normally used with 16 bit code. In that situation
284     // the PC is 16 bits, just like the addend. This means that it can
285     // point from any 16 bit address to any other if the possibility
286     // of wrapping is included.
287     // The only restriction we have to check then is that the destination
288     // address fits in 16 bits. That is impossible to do here. The problem is
289     // that we are passed the final value, which already had the
290     // current location subtracted from it.
291     // We just check that Val fits in 17 bits. This misses some cases, but
292     // should have no false positives.
293     checkInt(loc, val, 17, type);
294     write16le(loc, val);
295     break;
296   case R_386_32:
297   case R_386_GOT32:
298   case R_386_GOT32X:
299   case R_386_GOTOFF:
300   case R_386_GOTPC:
301   case R_386_PC32:
302   case R_386_PLT32:
303   case R_386_RELATIVE:
304   case R_386_TLS_DTPMOD32:
305   case R_386_TLS_DTPOFF32:
306   case R_386_TLS_GD:
307   case R_386_TLS_GOTIE:
308   case R_386_TLS_IE:
309   case R_386_TLS_LDM:
310   case R_386_TLS_LDO_32:
311   case R_386_TLS_LE:
312   case R_386_TLS_LE_32:
313   case R_386_TLS_TPOFF:
314   case R_386_TLS_TPOFF32:
315     checkInt(loc, val, 32, type);
316     write32le(loc, val);
317     break;
318   default:
319     llvm_unreachable("unknown relocation");
320   }
321 }
322 
relaxTlsGdToLe(uint8_t * loc,RelType type,uint64_t val) const323 void X86::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const {
324   // Convert
325   //   leal x@tlsgd(, %ebx, 1),
326   //   call __tls_get_addr@plt
327   // to
328   //   movl %gs:0,%eax
329   //   subl $x@ntpoff,%eax
330   const uint8_t inst[] = {
331       0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
332       0x81, 0xe8, 0, 0, 0, 0,             // subl Val(%ebx), %eax
333   };
334   memcpy(loc - 3, inst, sizeof(inst));
335   write32le(loc + 5, val);
336 }
337 
relaxTlsGdToIe(uint8_t * loc,RelType type,uint64_t val) const338 void X86::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const {
339   // Convert
340   //   leal x@tlsgd(, %ebx, 1),
341   //   call __tls_get_addr@plt
342   // to
343   //   movl %gs:0, %eax
344   //   addl x@gotntpoff(%ebx), %eax
345   const uint8_t inst[] = {
346       0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
347       0x03, 0x83, 0, 0, 0, 0,             // addl Val(%ebx), %eax
348   };
349   memcpy(loc - 3, inst, sizeof(inst));
350   write32le(loc + 5, val);
351 }
352 
353 // In some conditions, relocations can be optimized to avoid using GOT.
354 // This function does that for Initial Exec to Local Exec case.
relaxTlsIeToLe(uint8_t * loc,RelType type,uint64_t val) const355 void X86::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const {
356   // Ulrich's document section 6.2 says that @gotntpoff can
357   // be used with MOVL or ADDL instructions.
358   // @indntpoff is similar to @gotntpoff, but for use in
359   // position dependent code.
360   uint8_t reg = (loc[-1] >> 3) & 7;
361 
362   if (type == R_386_TLS_IE) {
363     if (loc[-1] == 0xa1) {
364       // "movl foo@indntpoff,%eax" -> "movl $foo,%eax"
365       // This case is different from the generic case below because
366       // this is a 5 byte instruction while below is 6 bytes.
367       loc[-1] = 0xb8;
368     } else if (loc[-2] == 0x8b) {
369       // "movl foo@indntpoff,%reg" -> "movl $foo,%reg"
370       loc[-2] = 0xc7;
371       loc[-1] = 0xc0 | reg;
372     } else {
373       // "addl foo@indntpoff,%reg" -> "addl $foo,%reg"
374       loc[-2] = 0x81;
375       loc[-1] = 0xc0 | reg;
376     }
377   } else {
378     assert(type == R_386_TLS_GOTIE);
379     if (loc[-2] == 0x8b) {
380       // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg"
381       loc[-2] = 0xc7;
382       loc[-1] = 0xc0 | reg;
383     } else {
384       // "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg"
385       loc[-2] = 0x8d;
386       loc[-1] = 0x80 | (reg << 3) | reg;
387     }
388   }
389   write32le(loc, val);
390 }
391 
relaxTlsLdToLe(uint8_t * loc,RelType type,uint64_t val) const392 void X86::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const {
393   if (type == R_386_TLS_LDO_32) {
394     write32le(loc, val);
395     return;
396   }
397 
398   // Convert
399   //   leal foo(%reg),%eax
400   //   call ___tls_get_addr
401   // to
402   //   movl %gs:0,%eax
403   //   nop
404   //   leal 0(%esi,1),%esi
405   const uint8_t inst[] = {
406       0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
407       0x90,                               // nop
408       0x8d, 0x74, 0x26, 0x00,             // leal 0(%esi,1),%esi
409   };
410   memcpy(loc - 2, inst, sizeof(inst));
411 }
412 
413 // If Intel Indirect Branch Tracking is enabled, we have to emit special PLT
414 // entries containing endbr32 instructions. A PLT entry will be split into two
415 // parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt).
416 namespace {
417 class IntelIBT : public X86 {
418 public:
419   IntelIBT();
420   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
421   void writePlt(uint8_t *buf, const Symbol &sym,
422                 uint64_t pltEntryAddr) const override;
423   void writeIBTPlt(uint8_t *buf, size_t numEntries) const override;
424 
425   static const unsigned IBTPltHeaderSize = 16;
426 };
427 } // namespace
428 
IntelIBT()429 IntelIBT::IntelIBT() { pltHeaderSize = 0; }
430 
writeGotPlt(uint8_t * buf,const Symbol & s) const431 void IntelIBT::writeGotPlt(uint8_t *buf, const Symbol &s) const {
432   uint64_t va =
433       in.ibtPlt->getVA() + IBTPltHeaderSize + s.pltIndex * pltEntrySize;
434   write32le(buf, va);
435 }
436 
writePlt(uint8_t * buf,const Symbol & sym,uint64_t) const437 void IntelIBT::writePlt(uint8_t *buf, const Symbol &sym,
438                         uint64_t /*pltEntryAddr*/) const {
439   if (config->isPic) {
440     const uint8_t inst[] = {
441         0xf3, 0x0f, 0x1e, 0xfb,       // endbr32
442         0xff, 0xa3, 0,    0,    0, 0, // jmp *name@GOT(%ebx)
443         0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
444     };
445     memcpy(buf, inst, sizeof(inst));
446     write32le(buf + 6, sym.getGotPltVA() - in.gotPlt->getVA());
447     return;
448   }
449 
450   const uint8_t inst[] = {
451       0xf3, 0x0f, 0x1e, 0xfb,       // endbr32
452       0xff, 0x25, 0,    0,    0, 0, // jmp *foo@GOT
453       0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
454   };
455   memcpy(buf, inst, sizeof(inst));
456   write32le(buf + 6, sym.getGotPltVA());
457 }
458 
writeIBTPlt(uint8_t * buf,size_t numEntries) const459 void IntelIBT::writeIBTPlt(uint8_t *buf, size_t numEntries) const {
460   writePltHeader(buf);
461   buf += IBTPltHeaderSize;
462 
463   const uint8_t inst[] = {
464       0xf3, 0x0f, 0x1e, 0xfb,    // endbr32
465       0x68, 0,    0,    0,    0, // pushl $reloc_offset
466       0xe9, 0,    0,    0,    0, // jmpq .PLT0@PC
467       0x66, 0x90,                // nop
468   };
469 
470   for (size_t i = 0; i < numEntries; ++i) {
471     memcpy(buf, inst, sizeof(inst));
472     write32le(buf + 5, i * sizeof(object::ELF32LE::Rel));
473     write32le(buf + 10, -pltHeaderSize - sizeof(inst) * i - 30);
474     buf += sizeof(inst);
475   }
476 }
477 
478 namespace {
479 class RetpolinePic : public X86 {
480 public:
481   RetpolinePic();
482   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
483   void writePltHeader(uint8_t *buf) const override;
484   void writePlt(uint8_t *buf, const Symbol &sym,
485                 uint64_t pltEntryAddr) const override;
486 };
487 
488 class RetpolineNoPic : public X86 {
489 public:
490   RetpolineNoPic();
491   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
492   void writePltHeader(uint8_t *buf) const override;
493   void writePlt(uint8_t *buf, const Symbol &sym,
494                 uint64_t pltEntryAddr) const override;
495 };
496 } // namespace
497 
RetpolinePic()498 RetpolinePic::RetpolinePic() {
499   pltHeaderSize = 48;
500   pltEntrySize = 32;
501   ipltEntrySize = 32;
502 }
503 
writeGotPlt(uint8_t * buf,const Symbol & s) const504 void RetpolinePic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
505   write32le(buf, s.getPltVA() + 17);
506 }
507 
writePltHeader(uint8_t * buf) const508 void RetpolinePic::writePltHeader(uint8_t *buf) const {
509   const uint8_t insn[] = {
510       0xff, 0xb3, 4,    0,    0,    0,          // 0:    pushl 4(%ebx)
511       0x50,                                     // 6:    pushl %eax
512       0x8b, 0x83, 8,    0,    0,    0,          // 7:    mov 8(%ebx), %eax
513       0xe8, 0x0e, 0x00, 0x00, 0x00,             // d:    call next
514       0xf3, 0x90,                               // 12: loop: pause
515       0x0f, 0xae, 0xe8,                         // 14:   lfence
516       0xeb, 0xf9,                               // 17:   jmp loop
517       0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19:   int3; .align 16
518       0x89, 0x0c, 0x24,                         // 20: next: mov %ecx, (%esp)
519       0x8b, 0x4c, 0x24, 0x04,                   // 23:   mov 0x4(%esp), %ecx
520       0x89, 0x44, 0x24, 0x04,                   // 27:   mov %eax ,0x4(%esp)
521       0x89, 0xc8,                               // 2b:   mov %ecx, %eax
522       0x59,                                     // 2d:   pop %ecx
523       0xc3,                                     // 2e:   ret
524       0xcc,                                     // 2f:   int3; padding
525   };
526   memcpy(buf, insn, sizeof(insn));
527 }
528 
writePlt(uint8_t * buf,const Symbol & sym,uint64_t pltEntryAddr) const529 void RetpolinePic::writePlt(uint8_t *buf, const Symbol &sym,
530                             uint64_t pltEntryAddr) const {
531   unsigned relOff = in.relaPlt->entsize * sym.pltIndex;
532   const uint8_t insn[] = {
533       0x50,                            // pushl %eax
534       0x8b, 0x83, 0,    0,    0,    0, // mov foo@GOT(%ebx), %eax
535       0xe8, 0,    0,    0,    0,       // call plt+0x20
536       0xe9, 0,    0,    0,    0,       // jmp plt+0x12
537       0x68, 0,    0,    0,    0,       // pushl $reloc_offset
538       0xe9, 0,    0,    0,    0,       // jmp plt+0
539       0xcc, 0xcc, 0xcc, 0xcc, 0xcc,    // int3; padding
540   };
541   memcpy(buf, insn, sizeof(insn));
542 
543   uint32_t ebx = in.gotPlt->getVA();
544   unsigned off = pltEntryAddr - in.plt->getVA();
545   write32le(buf + 3, sym.getGotPltVA() - ebx);
546   write32le(buf + 8, -off - 12 + 32);
547   write32le(buf + 13, -off - 17 + 18);
548   write32le(buf + 18, relOff);
549   write32le(buf + 23, -off - 27);
550 }
551 
RetpolineNoPic()552 RetpolineNoPic::RetpolineNoPic() {
553   pltHeaderSize = 48;
554   pltEntrySize = 32;
555   ipltEntrySize = 32;
556 }
557 
writeGotPlt(uint8_t * buf,const Symbol & s) const558 void RetpolineNoPic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
559   write32le(buf, s.getPltVA() + 16);
560 }
561 
writePltHeader(uint8_t * buf) const562 void RetpolineNoPic::writePltHeader(uint8_t *buf) const {
563   const uint8_t insn[] = {
564       0xff, 0x35, 0,    0,    0,    0, // 0:    pushl GOTPLT+4
565       0x50,                            // 6:    pushl %eax
566       0xa1, 0,    0,    0,    0,       // 7:    mov GOTPLT+8, %eax
567       0xe8, 0x0f, 0x00, 0x00, 0x00,    // c:    call next
568       0xf3, 0x90,                      // 11: loop: pause
569       0x0f, 0xae, 0xe8,                // 13:   lfence
570       0xeb, 0xf9,                      // 16:   jmp loop
571       0xcc, 0xcc, 0xcc, 0xcc, 0xcc,    // 18:   int3
572       0xcc, 0xcc, 0xcc,                // 1f:   int3; .align 16
573       0x89, 0x0c, 0x24,                // 20: next: mov %ecx, (%esp)
574       0x8b, 0x4c, 0x24, 0x04,          // 23:   mov 0x4(%esp), %ecx
575       0x89, 0x44, 0x24, 0x04,          // 27:   mov %eax ,0x4(%esp)
576       0x89, 0xc8,                      // 2b:   mov %ecx, %eax
577       0x59,                            // 2d:   pop %ecx
578       0xc3,                            // 2e:   ret
579       0xcc,                            // 2f:   int3; padding
580   };
581   memcpy(buf, insn, sizeof(insn));
582 
583   uint32_t gotPlt = in.gotPlt->getVA();
584   write32le(buf + 2, gotPlt + 4);
585   write32le(buf + 8, gotPlt + 8);
586 }
587 
writePlt(uint8_t * buf,const Symbol & sym,uint64_t pltEntryAddr) const588 void RetpolineNoPic::writePlt(uint8_t *buf, const Symbol &sym,
589                               uint64_t pltEntryAddr) const {
590   unsigned relOff = in.relaPlt->entsize * sym.pltIndex;
591   const uint8_t insn[] = {
592       0x50,                         // 0:  pushl %eax
593       0xa1, 0,    0,    0,    0,    // 1:  mov foo_in_GOT, %eax
594       0xe8, 0,    0,    0,    0,    // 6:  call plt+0x20
595       0xe9, 0,    0,    0,    0,    // b:  jmp plt+0x11
596       0x68, 0,    0,    0,    0,    // 10: pushl $reloc_offset
597       0xe9, 0,    0,    0,    0,    // 15: jmp plt+0
598       0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding
599       0xcc,                         // 1f: int3; padding
600   };
601   memcpy(buf, insn, sizeof(insn));
602 
603   unsigned off = pltEntryAddr - in.plt->getVA();
604   write32le(buf + 2, sym.getGotPltVA());
605   write32le(buf + 7, -off - 11 + 32);
606   write32le(buf + 12, -off - 16 + 17);
607   write32le(buf + 17, relOff);
608   write32le(buf + 22, -off - 26);
609 }
610 
getX86TargetInfo()611 TargetInfo *getX86TargetInfo() {
612   if (config->zRetpolineplt) {
613     if (config->isPic) {
614       static RetpolinePic t;
615       return &t;
616     }
617     static RetpolineNoPic t;
618     return &t;
619   }
620 
621   if (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT) {
622     static IntelIBT t;
623     return &t;
624   }
625 
626   static X86 t;
627   return &t;
628 }
629 
630 } // namespace elf
631 } // namespace lld
632