1 //===- X86.cpp ------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "InputFiles.h"
10 #include "Symbols.h"
11 #include "SyntheticSections.h"
12 #include "Target.h"
13 #include "lld/Common/ErrorHandler.h"
14 #include "llvm/Support/Endian.h"
15
16 using namespace llvm;
17 using namespace llvm::support::endian;
18 using namespace llvm::ELF;
19
20 namespace lld {
21 namespace elf {
22
23 namespace {
24 class X86 : public TargetInfo {
25 public:
26 X86();
27 int getTlsGdRelaxSkip(RelType type) const override;
28 RelExpr getRelExpr(RelType type, const Symbol &s,
29 const uint8_t *loc) const override;
30 int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
31 void writeGotPltHeader(uint8_t *buf) const override;
32 RelType getDynRel(RelType type) const override;
33 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
34 void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
35 void writePltHeader(uint8_t *buf) const override;
36 void writePlt(uint8_t *buf, const Symbol &sym,
37 uint64_t pltEntryAddr) const override;
38 void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override;
39
40 RelExpr adjustRelaxExpr(RelType type, const uint8_t *data,
41 RelExpr expr) const override;
42 void relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const override;
43 void relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const override;
44 void relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const override;
45 void relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const override;
46 };
47 } // namespace
48
X86()49 X86::X86() {
50 copyRel = R_386_COPY;
51 gotRel = R_386_GLOB_DAT;
52 noneRel = R_386_NONE;
53 pltRel = R_386_JUMP_SLOT;
54 iRelativeRel = R_386_IRELATIVE;
55 relativeRel = R_386_RELATIVE;
56 symbolicRel = R_386_32;
57 tlsGotRel = R_386_TLS_TPOFF;
58 tlsModuleIndexRel = R_386_TLS_DTPMOD32;
59 tlsOffsetRel = R_386_TLS_DTPOFF32;
60 pltHeaderSize = 16;
61 pltEntrySize = 16;
62 ipltEntrySize = 16;
63 trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
64
65 // Align to the non-PAE large page size (known as a superpage or huge page).
66 // FreeBSD automatically promotes large, superpage-aligned allocations.
67 defaultImageBase = 0x400000;
68 }
69
getTlsGdRelaxSkip(RelType type) const70 int X86::getTlsGdRelaxSkip(RelType type) const {
71 return 2;
72 }
73
getRelExpr(RelType type,const Symbol & s,const uint8_t * loc) const74 RelExpr X86::getRelExpr(RelType type, const Symbol &s,
75 const uint8_t *loc) const {
76 // There are 4 different TLS variable models with varying degrees of
77 // flexibility and performance. LocalExec and InitialExec models are fast but
78 // less-flexible models. If they are in use, we set DF_STATIC_TLS flag in the
79 // dynamic section to let runtime know about that.
80 if (type == R_386_TLS_LE || type == R_386_TLS_LE_32 || type == R_386_TLS_IE ||
81 type == R_386_TLS_GOTIE)
82 config->hasStaticTlsModel = true;
83
84 switch (type) {
85 case R_386_8:
86 case R_386_16:
87 case R_386_32:
88 return R_ABS;
89 case R_386_TLS_LDO_32:
90 return R_DTPREL;
91 case R_386_TLS_GD:
92 return R_TLSGD_GOTPLT;
93 case R_386_TLS_LDM:
94 return R_TLSLD_GOTPLT;
95 case R_386_PLT32:
96 return R_PLT_PC;
97 case R_386_PC8:
98 case R_386_PC16:
99 case R_386_PC32:
100 return R_PC;
101 case R_386_GOTPC:
102 return R_GOTPLTONLY_PC;
103 case R_386_TLS_IE:
104 return R_GOT;
105 case R_386_GOT32:
106 case R_386_GOT32X:
107 // These relocations are arguably mis-designed because their calculations
108 // depend on the instructions they are applied to. This is bad because we
109 // usually don't care about whether the target section contains valid
110 // machine instructions or not. But this is part of the documented ABI, so
111 // we had to implement as the standard requires.
112 //
113 // x86 does not support PC-relative data access. Therefore, in order to
114 // access GOT contents, a GOT address needs to be known at link-time
115 // (which means non-PIC) or compilers have to emit code to get a GOT
116 // address at runtime (which means code is position-independent but
117 // compilers need to emit extra code for each GOT access.) This decision
118 // is made at compile-time. In the latter case, compilers emit code to
119 // load a GOT address to a register, which is usually %ebx.
120 //
121 // So, there are two ways to refer to symbol foo's GOT entry: foo@GOT or
122 // foo@GOT(%ebx).
123 //
124 // foo@GOT is not usable in PIC. If we are creating a PIC output and if we
125 // find such relocation, we should report an error. foo@GOT is resolved to
126 // an *absolute* address of foo's GOT entry, because both GOT address and
127 // foo's offset are known. In other words, it's G + A.
128 //
129 // foo@GOT(%ebx) needs to be resolved to a *relative* offset from a GOT to
130 // foo's GOT entry in the table, because GOT address is not known but foo's
131 // offset in the table is known. It's G + A - GOT.
132 //
133 // It's unfortunate that compilers emit the same relocation for these
134 // different use cases. In order to distinguish them, we have to read a
135 // machine instruction.
136 //
137 // The following code implements it. We assume that Loc[0] is the first byte
138 // of a displacement or an immediate field of a valid machine
139 // instruction. That means a ModRM byte is at Loc[-1]. By taking a look at
140 // the byte, we can determine whether the instruction uses the operand as an
141 // absolute address (R_GOT) or a register-relative address (R_GOTPLT).
142 return (loc[-1] & 0xc7) == 0x5 ? R_GOT : R_GOTPLT;
143 case R_386_TLS_GOTIE:
144 return R_GOTPLT;
145 case R_386_GOTOFF:
146 return R_GOTPLTREL;
147 case R_386_TLS_LE:
148 return R_TLS;
149 case R_386_TLS_LE_32:
150 return R_NEG_TLS;
151 case R_386_NONE:
152 return R_NONE;
153 default:
154 error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
155 ") against symbol " + toString(s));
156 return R_NONE;
157 }
158 }
159
adjustRelaxExpr(RelType type,const uint8_t * data,RelExpr expr) const160 RelExpr X86::adjustRelaxExpr(RelType type, const uint8_t *data,
161 RelExpr expr) const {
162 switch (expr) {
163 default:
164 return expr;
165 case R_RELAX_TLS_GD_TO_IE:
166 return R_RELAX_TLS_GD_TO_IE_GOTPLT;
167 case R_RELAX_TLS_GD_TO_LE:
168 return R_RELAX_TLS_GD_TO_LE_NEG;
169 }
170 }
171
writeGotPltHeader(uint8_t * buf) const172 void X86::writeGotPltHeader(uint8_t *buf) const {
173 write32le(buf, mainPart->dynamic->getVA());
174 }
175
writeGotPlt(uint8_t * buf,const Symbol & s) const176 void X86::writeGotPlt(uint8_t *buf, const Symbol &s) const {
177 // Entries in .got.plt initially points back to the corresponding
178 // PLT entries with a fixed offset to skip the first instruction.
179 write32le(buf, s.getPltVA() + 6);
180 }
181
writeIgotPlt(uint8_t * buf,const Symbol & s) const182 void X86::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
183 // An x86 entry is the address of the ifunc resolver function.
184 write32le(buf, s.getVA());
185 }
186
getDynRel(RelType type) const187 RelType X86::getDynRel(RelType type) const {
188 if (type == R_386_TLS_LE)
189 return R_386_TLS_TPOFF;
190 if (type == R_386_TLS_LE_32)
191 return R_386_TLS_TPOFF32;
192 return type;
193 }
194
writePltHeader(uint8_t * buf) const195 void X86::writePltHeader(uint8_t *buf) const {
196 if (config->isPic) {
197 const uint8_t v[] = {
198 0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl 4(%ebx)
199 0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *8(%ebx)
200 0x90, 0x90, 0x90, 0x90 // nop
201 };
202 memcpy(buf, v, sizeof(v));
203 return;
204 }
205
206 const uint8_t pltData[] = {
207 0xff, 0x35, 0, 0, 0, 0, // pushl (GOTPLT+4)
208 0xff, 0x25, 0, 0, 0, 0, // jmp *(GOTPLT+8)
209 0x90, 0x90, 0x90, 0x90, // nop
210 };
211 memcpy(buf, pltData, sizeof(pltData));
212 uint32_t gotPlt = in.gotPlt->getVA();
213 write32le(buf + 2, gotPlt + 4);
214 write32le(buf + 8, gotPlt + 8);
215 }
216
writePlt(uint8_t * buf,const Symbol & sym,uint64_t pltEntryAddr) const217 void X86::writePlt(uint8_t *buf, const Symbol &sym,
218 uint64_t pltEntryAddr) const {
219 unsigned relOff = in.relaPlt->entsize * sym.pltIndex;
220 if (config->isPic) {
221 const uint8_t inst[] = {
222 0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx)
223 0x68, 0, 0, 0, 0, // pushl $reloc_offset
224 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC
225 };
226 memcpy(buf, inst, sizeof(inst));
227 write32le(buf + 2, sym.getGotPltVA() - in.gotPlt->getVA());
228 } else {
229 const uint8_t inst[] = {
230 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
231 0x68, 0, 0, 0, 0, // pushl $reloc_offset
232 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC
233 };
234 memcpy(buf, inst, sizeof(inst));
235 write32le(buf + 2, sym.getGotPltVA());
236 }
237
238 write32le(buf + 7, relOff);
239 write32le(buf + 12, in.plt->getVA() - pltEntryAddr - 16);
240 }
241
getImplicitAddend(const uint8_t * buf,RelType type) const242 int64_t X86::getImplicitAddend(const uint8_t *buf, RelType type) const {
243 switch (type) {
244 case R_386_8:
245 case R_386_PC8:
246 return SignExtend64<8>(*buf);
247 case R_386_16:
248 case R_386_PC16:
249 return SignExtend64<16>(read16le(buf));
250 case R_386_32:
251 case R_386_GOT32:
252 case R_386_GOT32X:
253 case R_386_GOTOFF:
254 case R_386_GOTPC:
255 case R_386_PC32:
256 case R_386_PLT32:
257 case R_386_TLS_LDO_32:
258 case R_386_TLS_LE:
259 return SignExtend64<32>(read32le(buf));
260 default:
261 return 0;
262 }
263 }
264
relocateOne(uint8_t * loc,RelType type,uint64_t val) const265 void X86::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
266 switch (type) {
267 case R_386_8:
268 // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are
269 // being used for some 16-bit programs such as boot loaders, so
270 // we want to support them.
271 checkIntUInt(loc, val, 8, type);
272 *loc = val;
273 break;
274 case R_386_PC8:
275 checkInt(loc, val, 8, type);
276 *loc = val;
277 break;
278 case R_386_16:
279 checkIntUInt(loc, val, 16, type);
280 write16le(loc, val);
281 break;
282 case R_386_PC16:
283 // R_386_PC16 is normally used with 16 bit code. In that situation
284 // the PC is 16 bits, just like the addend. This means that it can
285 // point from any 16 bit address to any other if the possibility
286 // of wrapping is included.
287 // The only restriction we have to check then is that the destination
288 // address fits in 16 bits. That is impossible to do here. The problem is
289 // that we are passed the final value, which already had the
290 // current location subtracted from it.
291 // We just check that Val fits in 17 bits. This misses some cases, but
292 // should have no false positives.
293 checkInt(loc, val, 17, type);
294 write16le(loc, val);
295 break;
296 case R_386_32:
297 case R_386_GOT32:
298 case R_386_GOT32X:
299 case R_386_GOTOFF:
300 case R_386_GOTPC:
301 case R_386_PC32:
302 case R_386_PLT32:
303 case R_386_RELATIVE:
304 case R_386_TLS_DTPMOD32:
305 case R_386_TLS_DTPOFF32:
306 case R_386_TLS_GD:
307 case R_386_TLS_GOTIE:
308 case R_386_TLS_IE:
309 case R_386_TLS_LDM:
310 case R_386_TLS_LDO_32:
311 case R_386_TLS_LE:
312 case R_386_TLS_LE_32:
313 case R_386_TLS_TPOFF:
314 case R_386_TLS_TPOFF32:
315 checkInt(loc, val, 32, type);
316 write32le(loc, val);
317 break;
318 default:
319 llvm_unreachable("unknown relocation");
320 }
321 }
322
relaxTlsGdToLe(uint8_t * loc,RelType type,uint64_t val) const323 void X86::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const {
324 // Convert
325 // leal x@tlsgd(, %ebx, 1),
326 // call __tls_get_addr@plt
327 // to
328 // movl %gs:0,%eax
329 // subl $x@ntpoff,%eax
330 const uint8_t inst[] = {
331 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
332 0x81, 0xe8, 0, 0, 0, 0, // subl Val(%ebx), %eax
333 };
334 memcpy(loc - 3, inst, sizeof(inst));
335 write32le(loc + 5, val);
336 }
337
relaxTlsGdToIe(uint8_t * loc,RelType type,uint64_t val) const338 void X86::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const {
339 // Convert
340 // leal x@tlsgd(, %ebx, 1),
341 // call __tls_get_addr@plt
342 // to
343 // movl %gs:0, %eax
344 // addl x@gotntpoff(%ebx), %eax
345 const uint8_t inst[] = {
346 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
347 0x03, 0x83, 0, 0, 0, 0, // addl Val(%ebx), %eax
348 };
349 memcpy(loc - 3, inst, sizeof(inst));
350 write32le(loc + 5, val);
351 }
352
353 // In some conditions, relocations can be optimized to avoid using GOT.
354 // This function does that for Initial Exec to Local Exec case.
relaxTlsIeToLe(uint8_t * loc,RelType type,uint64_t val) const355 void X86::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const {
356 // Ulrich's document section 6.2 says that @gotntpoff can
357 // be used with MOVL or ADDL instructions.
358 // @indntpoff is similar to @gotntpoff, but for use in
359 // position dependent code.
360 uint8_t reg = (loc[-1] >> 3) & 7;
361
362 if (type == R_386_TLS_IE) {
363 if (loc[-1] == 0xa1) {
364 // "movl foo@indntpoff,%eax" -> "movl $foo,%eax"
365 // This case is different from the generic case below because
366 // this is a 5 byte instruction while below is 6 bytes.
367 loc[-1] = 0xb8;
368 } else if (loc[-2] == 0x8b) {
369 // "movl foo@indntpoff,%reg" -> "movl $foo,%reg"
370 loc[-2] = 0xc7;
371 loc[-1] = 0xc0 | reg;
372 } else {
373 // "addl foo@indntpoff,%reg" -> "addl $foo,%reg"
374 loc[-2] = 0x81;
375 loc[-1] = 0xc0 | reg;
376 }
377 } else {
378 assert(type == R_386_TLS_GOTIE);
379 if (loc[-2] == 0x8b) {
380 // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg"
381 loc[-2] = 0xc7;
382 loc[-1] = 0xc0 | reg;
383 } else {
384 // "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg"
385 loc[-2] = 0x8d;
386 loc[-1] = 0x80 | (reg << 3) | reg;
387 }
388 }
389 write32le(loc, val);
390 }
391
relaxTlsLdToLe(uint8_t * loc,RelType type,uint64_t val) const392 void X86::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const {
393 if (type == R_386_TLS_LDO_32) {
394 write32le(loc, val);
395 return;
396 }
397
398 // Convert
399 // leal foo(%reg),%eax
400 // call ___tls_get_addr
401 // to
402 // movl %gs:0,%eax
403 // nop
404 // leal 0(%esi,1),%esi
405 const uint8_t inst[] = {
406 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
407 0x90, // nop
408 0x8d, 0x74, 0x26, 0x00, // leal 0(%esi,1),%esi
409 };
410 memcpy(loc - 2, inst, sizeof(inst));
411 }
412
413 // If Intel Indirect Branch Tracking is enabled, we have to emit special PLT
414 // entries containing endbr32 instructions. A PLT entry will be split into two
415 // parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt).
416 namespace {
417 class IntelIBT : public X86 {
418 public:
419 IntelIBT();
420 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
421 void writePlt(uint8_t *buf, const Symbol &sym,
422 uint64_t pltEntryAddr) const override;
423 void writeIBTPlt(uint8_t *buf, size_t numEntries) const override;
424
425 static const unsigned IBTPltHeaderSize = 16;
426 };
427 } // namespace
428
IntelIBT()429 IntelIBT::IntelIBT() { pltHeaderSize = 0; }
430
writeGotPlt(uint8_t * buf,const Symbol & s) const431 void IntelIBT::writeGotPlt(uint8_t *buf, const Symbol &s) const {
432 uint64_t va =
433 in.ibtPlt->getVA() + IBTPltHeaderSize + s.pltIndex * pltEntrySize;
434 write32le(buf, va);
435 }
436
writePlt(uint8_t * buf,const Symbol & sym,uint64_t) const437 void IntelIBT::writePlt(uint8_t *buf, const Symbol &sym,
438 uint64_t /*pltEntryAddr*/) const {
439 if (config->isPic) {
440 const uint8_t inst[] = {
441 0xf3, 0x0f, 0x1e, 0xfb, // endbr32
442 0xff, 0xa3, 0, 0, 0, 0, // jmp *name@GOT(%ebx)
443 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
444 };
445 memcpy(buf, inst, sizeof(inst));
446 write32le(buf + 6, sym.getGotPltVA() - in.gotPlt->getVA());
447 return;
448 }
449
450 const uint8_t inst[] = {
451 0xf3, 0x0f, 0x1e, 0xfb, // endbr32
452 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
453 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
454 };
455 memcpy(buf, inst, sizeof(inst));
456 write32le(buf + 6, sym.getGotPltVA());
457 }
458
writeIBTPlt(uint8_t * buf,size_t numEntries) const459 void IntelIBT::writeIBTPlt(uint8_t *buf, size_t numEntries) const {
460 writePltHeader(buf);
461 buf += IBTPltHeaderSize;
462
463 const uint8_t inst[] = {
464 0xf3, 0x0f, 0x1e, 0xfb, // endbr32
465 0x68, 0, 0, 0, 0, // pushl $reloc_offset
466 0xe9, 0, 0, 0, 0, // jmpq .PLT0@PC
467 0x66, 0x90, // nop
468 };
469
470 for (size_t i = 0; i < numEntries; ++i) {
471 memcpy(buf, inst, sizeof(inst));
472 write32le(buf + 5, i * sizeof(object::ELF32LE::Rel));
473 write32le(buf + 10, -pltHeaderSize - sizeof(inst) * i - 30);
474 buf += sizeof(inst);
475 }
476 }
477
478 namespace {
479 class RetpolinePic : public X86 {
480 public:
481 RetpolinePic();
482 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
483 void writePltHeader(uint8_t *buf) const override;
484 void writePlt(uint8_t *buf, const Symbol &sym,
485 uint64_t pltEntryAddr) const override;
486 };
487
488 class RetpolineNoPic : public X86 {
489 public:
490 RetpolineNoPic();
491 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
492 void writePltHeader(uint8_t *buf) const override;
493 void writePlt(uint8_t *buf, const Symbol &sym,
494 uint64_t pltEntryAddr) const override;
495 };
496 } // namespace
497
RetpolinePic()498 RetpolinePic::RetpolinePic() {
499 pltHeaderSize = 48;
500 pltEntrySize = 32;
501 ipltEntrySize = 32;
502 }
503
writeGotPlt(uint8_t * buf,const Symbol & s) const504 void RetpolinePic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
505 write32le(buf, s.getPltVA() + 17);
506 }
507
writePltHeader(uint8_t * buf) const508 void RetpolinePic::writePltHeader(uint8_t *buf) const {
509 const uint8_t insn[] = {
510 0xff, 0xb3, 4, 0, 0, 0, // 0: pushl 4(%ebx)
511 0x50, // 6: pushl %eax
512 0x8b, 0x83, 8, 0, 0, 0, // 7: mov 8(%ebx), %eax
513 0xe8, 0x0e, 0x00, 0x00, 0x00, // d: call next
514 0xf3, 0x90, // 12: loop: pause
515 0x0f, 0xae, 0xe8, // 14: lfence
516 0xeb, 0xf9, // 17: jmp loop
517 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19: int3; .align 16
518 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp)
519 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx
520 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp)
521 0x89, 0xc8, // 2b: mov %ecx, %eax
522 0x59, // 2d: pop %ecx
523 0xc3, // 2e: ret
524 0xcc, // 2f: int3; padding
525 };
526 memcpy(buf, insn, sizeof(insn));
527 }
528
writePlt(uint8_t * buf,const Symbol & sym,uint64_t pltEntryAddr) const529 void RetpolinePic::writePlt(uint8_t *buf, const Symbol &sym,
530 uint64_t pltEntryAddr) const {
531 unsigned relOff = in.relaPlt->entsize * sym.pltIndex;
532 const uint8_t insn[] = {
533 0x50, // pushl %eax
534 0x8b, 0x83, 0, 0, 0, 0, // mov foo@GOT(%ebx), %eax
535 0xe8, 0, 0, 0, 0, // call plt+0x20
536 0xe9, 0, 0, 0, 0, // jmp plt+0x12
537 0x68, 0, 0, 0, 0, // pushl $reloc_offset
538 0xe9, 0, 0, 0, 0, // jmp plt+0
539 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // int3; padding
540 };
541 memcpy(buf, insn, sizeof(insn));
542
543 uint32_t ebx = in.gotPlt->getVA();
544 unsigned off = pltEntryAddr - in.plt->getVA();
545 write32le(buf + 3, sym.getGotPltVA() - ebx);
546 write32le(buf + 8, -off - 12 + 32);
547 write32le(buf + 13, -off - 17 + 18);
548 write32le(buf + 18, relOff);
549 write32le(buf + 23, -off - 27);
550 }
551
RetpolineNoPic()552 RetpolineNoPic::RetpolineNoPic() {
553 pltHeaderSize = 48;
554 pltEntrySize = 32;
555 ipltEntrySize = 32;
556 }
557
writeGotPlt(uint8_t * buf,const Symbol & s) const558 void RetpolineNoPic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
559 write32le(buf, s.getPltVA() + 16);
560 }
561
writePltHeader(uint8_t * buf) const562 void RetpolineNoPic::writePltHeader(uint8_t *buf) const {
563 const uint8_t insn[] = {
564 0xff, 0x35, 0, 0, 0, 0, // 0: pushl GOTPLT+4
565 0x50, // 6: pushl %eax
566 0xa1, 0, 0, 0, 0, // 7: mov GOTPLT+8, %eax
567 0xe8, 0x0f, 0x00, 0x00, 0x00, // c: call next
568 0xf3, 0x90, // 11: loop: pause
569 0x0f, 0xae, 0xe8, // 13: lfence
570 0xeb, 0xf9, // 16: jmp loop
571 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 18: int3
572 0xcc, 0xcc, 0xcc, // 1f: int3; .align 16
573 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp)
574 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx
575 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp)
576 0x89, 0xc8, // 2b: mov %ecx, %eax
577 0x59, // 2d: pop %ecx
578 0xc3, // 2e: ret
579 0xcc, // 2f: int3; padding
580 };
581 memcpy(buf, insn, sizeof(insn));
582
583 uint32_t gotPlt = in.gotPlt->getVA();
584 write32le(buf + 2, gotPlt + 4);
585 write32le(buf + 8, gotPlt + 8);
586 }
587
writePlt(uint8_t * buf,const Symbol & sym,uint64_t pltEntryAddr) const588 void RetpolineNoPic::writePlt(uint8_t *buf, const Symbol &sym,
589 uint64_t pltEntryAddr) const {
590 unsigned relOff = in.relaPlt->entsize * sym.pltIndex;
591 const uint8_t insn[] = {
592 0x50, // 0: pushl %eax
593 0xa1, 0, 0, 0, 0, // 1: mov foo_in_GOT, %eax
594 0xe8, 0, 0, 0, 0, // 6: call plt+0x20
595 0xe9, 0, 0, 0, 0, // b: jmp plt+0x11
596 0x68, 0, 0, 0, 0, // 10: pushl $reloc_offset
597 0xe9, 0, 0, 0, 0, // 15: jmp plt+0
598 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding
599 0xcc, // 1f: int3; padding
600 };
601 memcpy(buf, insn, sizeof(insn));
602
603 unsigned off = pltEntryAddr - in.plt->getVA();
604 write32le(buf + 2, sym.getGotPltVA());
605 write32le(buf + 7, -off - 11 + 32);
606 write32le(buf + 12, -off - 16 + 17);
607 write32le(buf + 17, relOff);
608 write32le(buf + 22, -off - 26);
609 }
610
getX86TargetInfo()611 TargetInfo *getX86TargetInfo() {
612 if (config->zRetpolineplt) {
613 if (config->isPic) {
614 static RetpolinePic t;
615 return &t;
616 }
617 static RetpolineNoPic t;
618 return &t;
619 }
620
621 if (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT) {
622 static IntelIBT t;
623 return &t;
624 }
625
626 static X86 t;
627 return &t;
628 }
629
630 } // namespace elf
631 } // namespace lld
632