1 //===- X86.cpp ------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "InputFiles.h"
10 #include "Symbols.h"
11 #include "SyntheticSections.h"
12 #include "Target.h"
13 #include "lld/Common/ErrorHandler.h"
14 #include "llvm/Support/Endian.h"
15
16 using namespace llvm;
17 using namespace llvm::support::endian;
18 using namespace llvm::ELF;
19 using namespace lld;
20 using namespace lld::elf;
21
22 namespace {
23 class X86 : public TargetInfo {
24 public:
25 X86();
26 int getTlsGdRelaxSkip(RelType type) const override;
27 RelExpr getRelExpr(RelType type, const Symbol &s,
28 const uint8_t *loc) const override;
29 int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
30 void writeGotPltHeader(uint8_t *buf) const override;
31 RelType getDynRel(RelType type) const override;
32 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
33 void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
34 void writePltHeader(uint8_t *buf) const override;
35 void writePlt(uint8_t *buf, const Symbol &sym,
36 uint64_t pltEntryAddr) const override;
37 void relocate(uint8_t *loc, const Relocation &rel,
38 uint64_t val) const override;
39
40 RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
41 void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
42 uint64_t val) const override;
43 void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
44 uint64_t val) const override;
45 void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
46 uint64_t val) const override;
47 void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
48 uint64_t val) const override;
49 };
50 } // namespace
51
X86()52 X86::X86() {
53 copyRel = R_386_COPY;
54 gotRel = R_386_GLOB_DAT;
55 noneRel = R_386_NONE;
56 pltRel = R_386_JUMP_SLOT;
57 iRelativeRel = R_386_IRELATIVE;
58 relativeRel = R_386_RELATIVE;
59 symbolicRel = R_386_32;
60 tlsGotRel = R_386_TLS_TPOFF;
61 tlsModuleIndexRel = R_386_TLS_DTPMOD32;
62 tlsOffsetRel = R_386_TLS_DTPOFF32;
63 pltHeaderSize = 16;
64 pltEntrySize = 16;
65 ipltEntrySize = 16;
66 trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
67
68 // Align to the non-PAE large page size (known as a superpage or huge page).
69 // FreeBSD automatically promotes large, superpage-aligned allocations.
70 defaultImageBase = 0x400000;
71 }
72
getTlsGdRelaxSkip(RelType type) const73 int X86::getTlsGdRelaxSkip(RelType type) const {
74 return 2;
75 }
76
getRelExpr(RelType type,const Symbol & s,const uint8_t * loc) const77 RelExpr X86::getRelExpr(RelType type, const Symbol &s,
78 const uint8_t *loc) const {
79 // There are 4 different TLS variable models with varying degrees of
80 // flexibility and performance. LocalExec and InitialExec models are fast but
81 // less-flexible models. If they are in use, we set DF_STATIC_TLS flag in the
82 // dynamic section to let runtime know about that.
83 if (type == R_386_TLS_LE || type == R_386_TLS_LE_32 || type == R_386_TLS_IE ||
84 type == R_386_TLS_GOTIE)
85 config->hasStaticTlsModel = true;
86
87 switch (type) {
88 case R_386_8:
89 case R_386_16:
90 case R_386_32:
91 return R_ABS;
92 case R_386_TLS_LDO_32:
93 return R_DTPREL;
94 case R_386_TLS_GD:
95 return R_TLSGD_GOTPLT;
96 case R_386_TLS_LDM:
97 return R_TLSLD_GOTPLT;
98 case R_386_PLT32:
99 return R_PLT_PC;
100 case R_386_PC8:
101 case R_386_PC16:
102 case R_386_PC32:
103 return R_PC;
104 case R_386_GOTPC:
105 return R_GOTPLTONLY_PC;
106 case R_386_TLS_IE:
107 return R_GOT;
108 case R_386_GOT32:
109 case R_386_GOT32X:
110 // These relocations are arguably mis-designed because their calculations
111 // depend on the instructions they are applied to. This is bad because we
112 // usually don't care about whether the target section contains valid
113 // machine instructions or not. But this is part of the documented ABI, so
114 // we had to implement as the standard requires.
115 //
116 // x86 does not support PC-relative data access. Therefore, in order to
117 // access GOT contents, a GOT address needs to be known at link-time
118 // (which means non-PIC) or compilers have to emit code to get a GOT
119 // address at runtime (which means code is position-independent but
120 // compilers need to emit extra code for each GOT access.) This decision
121 // is made at compile-time. In the latter case, compilers emit code to
122 // load a GOT address to a register, which is usually %ebx.
123 //
124 // So, there are two ways to refer to symbol foo's GOT entry: foo@GOT or
125 // foo@GOT(%ebx).
126 //
127 // foo@GOT is not usable in PIC. If we are creating a PIC output and if we
128 // find such relocation, we should report an error. foo@GOT is resolved to
129 // an *absolute* address of foo's GOT entry, because both GOT address and
130 // foo's offset are known. In other words, it's G + A.
131 //
132 // foo@GOT(%ebx) needs to be resolved to a *relative* offset from a GOT to
133 // foo's GOT entry in the table, because GOT address is not known but foo's
134 // offset in the table is known. It's G + A - GOT.
135 //
136 // It's unfortunate that compilers emit the same relocation for these
137 // different use cases. In order to distinguish them, we have to read a
138 // machine instruction.
139 //
140 // The following code implements it. We assume that Loc[0] is the first byte
141 // of a displacement or an immediate field of a valid machine
142 // instruction. That means a ModRM byte is at Loc[-1]. By taking a look at
143 // the byte, we can determine whether the instruction uses the operand as an
144 // absolute address (R_GOT) or a register-relative address (R_GOTPLT).
145 return (loc[-1] & 0xc7) == 0x5 ? R_GOT : R_GOTPLT;
146 case R_386_TLS_GOTIE:
147 return R_GOTPLT;
148 case R_386_GOTOFF:
149 return R_GOTPLTREL;
150 case R_386_TLS_LE:
151 return R_TPREL;
152 case R_386_TLS_LE_32:
153 return R_TPREL_NEG;
154 case R_386_NONE:
155 return R_NONE;
156 default:
157 error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
158 ") against symbol " + toString(s));
159 return R_NONE;
160 }
161 }
162
adjustTlsExpr(RelType type,RelExpr expr) const163 RelExpr X86::adjustTlsExpr(RelType type, RelExpr expr) const {
164 switch (expr) {
165 default:
166 return expr;
167 case R_RELAX_TLS_GD_TO_IE:
168 return R_RELAX_TLS_GD_TO_IE_GOTPLT;
169 case R_RELAX_TLS_GD_TO_LE:
170 return R_RELAX_TLS_GD_TO_LE_NEG;
171 }
172 }
173
writeGotPltHeader(uint8_t * buf) const174 void X86::writeGotPltHeader(uint8_t *buf) const {
175 write32le(buf, mainPart->dynamic->getVA());
176 }
177
writeGotPlt(uint8_t * buf,const Symbol & s) const178 void X86::writeGotPlt(uint8_t *buf, const Symbol &s) const {
179 // Entries in .got.plt initially points back to the corresponding
180 // PLT entries with a fixed offset to skip the first instruction.
181 write32le(buf, s.getPltVA() + 6);
182 }
183
writeIgotPlt(uint8_t * buf,const Symbol & s) const184 void X86::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
185 // An x86 entry is the address of the ifunc resolver function.
186 write32le(buf, s.getVA());
187 }
188
getDynRel(RelType type) const189 RelType X86::getDynRel(RelType type) const {
190 if (type == R_386_TLS_LE)
191 return R_386_TLS_TPOFF;
192 if (type == R_386_TLS_LE_32)
193 return R_386_TLS_TPOFF32;
194 return type;
195 }
196
writePltHeader(uint8_t * buf) const197 void X86::writePltHeader(uint8_t *buf) const {
198 if (config->isPic) {
199 const uint8_t v[] = {
200 0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl 4(%ebx)
201 0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *8(%ebx)
202 0x90, 0x90, 0x90, 0x90 // nop
203 };
204 memcpy(buf, v, sizeof(v));
205 return;
206 }
207
208 const uint8_t pltData[] = {
209 0xff, 0x35, 0, 0, 0, 0, // pushl (GOTPLT+4)
210 0xff, 0x25, 0, 0, 0, 0, // jmp *(GOTPLT+8)
211 0x90, 0x90, 0x90, 0x90, // nop
212 };
213 memcpy(buf, pltData, sizeof(pltData));
214 uint32_t gotPlt = in.gotPlt->getVA();
215 write32le(buf + 2, gotPlt + 4);
216 write32le(buf + 8, gotPlt + 8);
217 }
218
writePlt(uint8_t * buf,const Symbol & sym,uint64_t pltEntryAddr) const219 void X86::writePlt(uint8_t *buf, const Symbol &sym,
220 uint64_t pltEntryAddr) const {
221 unsigned relOff = in.relaPlt->entsize * sym.pltIndex;
222 if (config->isPic) {
223 const uint8_t inst[] = {
224 0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx)
225 0x68, 0, 0, 0, 0, // pushl $reloc_offset
226 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC
227 };
228 memcpy(buf, inst, sizeof(inst));
229 write32le(buf + 2, sym.getGotPltVA() - in.gotPlt->getVA());
230 } else {
231 const uint8_t inst[] = {
232 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
233 0x68, 0, 0, 0, 0, // pushl $reloc_offset
234 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC
235 };
236 memcpy(buf, inst, sizeof(inst));
237 write32le(buf + 2, sym.getGotPltVA());
238 }
239
240 write32le(buf + 7, relOff);
241 write32le(buf + 12, in.plt->getVA() - pltEntryAddr - 16);
242 }
243
getImplicitAddend(const uint8_t * buf,RelType type) const244 int64_t X86::getImplicitAddend(const uint8_t *buf, RelType type) const {
245 switch (type) {
246 case R_386_8:
247 case R_386_PC8:
248 return SignExtend64<8>(*buf);
249 case R_386_16:
250 case R_386_PC16:
251 return SignExtend64<16>(read16le(buf));
252 case R_386_32:
253 case R_386_GLOB_DAT:
254 case R_386_GOT32:
255 case R_386_GOT32X:
256 case R_386_GOTOFF:
257 case R_386_GOTPC:
258 case R_386_IRELATIVE:
259 case R_386_PC32:
260 case R_386_PLT32:
261 case R_386_RELATIVE:
262 case R_386_TLS_DTPMOD32:
263 case R_386_TLS_DTPOFF32:
264 case R_386_TLS_LDO_32:
265 case R_386_TLS_LDM:
266 case R_386_TLS_IE:
267 case R_386_TLS_IE_32:
268 case R_386_TLS_LE:
269 case R_386_TLS_LE_32:
270 case R_386_TLS_GD:
271 case R_386_TLS_GD_32:
272 case R_386_TLS_GOTIE:
273 case R_386_TLS_TPOFF:
274 case R_386_TLS_TPOFF32:
275 return SignExtend64<32>(read32le(buf));
276 case R_386_NONE:
277 case R_386_JUMP_SLOT:
278 // These relocations are defined as not having an implicit addend.
279 return 0;
280 default:
281 internalLinkerError(getErrorLocation(buf),
282 "cannot read addend for relocation " + toString(type));
283 return 0;
284 }
285 }
286
relocate(uint8_t * loc,const Relocation & rel,uint64_t val) const287 void X86::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
288 switch (rel.type) {
289 case R_386_8:
290 // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are
291 // being used for some 16-bit programs such as boot loaders, so
292 // we want to support them.
293 checkIntUInt(loc, val, 8, rel);
294 *loc = val;
295 break;
296 case R_386_PC8:
297 checkInt(loc, val, 8, rel);
298 *loc = val;
299 break;
300 case R_386_16:
301 checkIntUInt(loc, val, 16, rel);
302 write16le(loc, val);
303 break;
304 case R_386_PC16:
305 // R_386_PC16 is normally used with 16 bit code. In that situation
306 // the PC is 16 bits, just like the addend. This means that it can
307 // point from any 16 bit address to any other if the possibility
308 // of wrapping is included.
309 // The only restriction we have to check then is that the destination
310 // address fits in 16 bits. That is impossible to do here. The problem is
311 // that we are passed the final value, which already had the
312 // current location subtracted from it.
313 // We just check that Val fits in 17 bits. This misses some cases, but
314 // should have no false positives.
315 checkInt(loc, val, 17, rel);
316 write16le(loc, val);
317 break;
318 case R_386_32:
319 case R_386_GOT32:
320 case R_386_GOT32X:
321 case R_386_GOTOFF:
322 case R_386_GOTPC:
323 case R_386_PC32:
324 case R_386_PLT32:
325 case R_386_RELATIVE:
326 case R_386_TLS_DTPMOD32:
327 case R_386_TLS_DTPOFF32:
328 case R_386_TLS_GD:
329 case R_386_TLS_GOTIE:
330 case R_386_TLS_IE:
331 case R_386_TLS_LDM:
332 case R_386_TLS_LDO_32:
333 case R_386_TLS_LE:
334 case R_386_TLS_LE_32:
335 case R_386_TLS_TPOFF:
336 case R_386_TLS_TPOFF32:
337 checkInt(loc, val, 32, rel);
338 write32le(loc, val);
339 break;
340 default:
341 llvm_unreachable("unknown relocation");
342 }
343 }
344
relaxTlsGdToLe(uint8_t * loc,const Relocation &,uint64_t val) const345 void X86::relaxTlsGdToLe(uint8_t *loc, const Relocation &, uint64_t val) const {
346 // Convert
347 // leal x@tlsgd(, %ebx, 1),
348 // call __tls_get_addr@plt
349 // to
350 // movl %gs:0,%eax
351 // subl $x@ntpoff,%eax
352 const uint8_t inst[] = {
353 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
354 0x81, 0xe8, 0, 0, 0, 0, // subl Val(%ebx), %eax
355 };
356 memcpy(loc - 3, inst, sizeof(inst));
357 write32le(loc + 5, val);
358 }
359
relaxTlsGdToIe(uint8_t * loc,const Relocation &,uint64_t val) const360 void X86::relaxTlsGdToIe(uint8_t *loc, const Relocation &, uint64_t val) const {
361 // Convert
362 // leal x@tlsgd(, %ebx, 1),
363 // call __tls_get_addr@plt
364 // to
365 // movl %gs:0, %eax
366 // addl x@gotntpoff(%ebx), %eax
367 const uint8_t inst[] = {
368 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
369 0x03, 0x83, 0, 0, 0, 0, // addl Val(%ebx), %eax
370 };
371 memcpy(loc - 3, inst, sizeof(inst));
372 write32le(loc + 5, val);
373 }
374
375 // In some conditions, relocations can be optimized to avoid using GOT.
376 // This function does that for Initial Exec to Local Exec case.
relaxTlsIeToLe(uint8_t * loc,const Relocation & rel,uint64_t val) const377 void X86::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
378 uint64_t val) const {
379 // Ulrich's document section 6.2 says that @gotntpoff can
380 // be used with MOVL or ADDL instructions.
381 // @indntpoff is similar to @gotntpoff, but for use in
382 // position dependent code.
383 uint8_t reg = (loc[-1] >> 3) & 7;
384
385 if (rel.type == R_386_TLS_IE) {
386 if (loc[-1] == 0xa1) {
387 // "movl foo@indntpoff,%eax" -> "movl $foo,%eax"
388 // This case is different from the generic case below because
389 // this is a 5 byte instruction while below is 6 bytes.
390 loc[-1] = 0xb8;
391 } else if (loc[-2] == 0x8b) {
392 // "movl foo@indntpoff,%reg" -> "movl $foo,%reg"
393 loc[-2] = 0xc7;
394 loc[-1] = 0xc0 | reg;
395 } else {
396 // "addl foo@indntpoff,%reg" -> "addl $foo,%reg"
397 loc[-2] = 0x81;
398 loc[-1] = 0xc0 | reg;
399 }
400 } else {
401 assert(rel.type == R_386_TLS_GOTIE);
402 if (loc[-2] == 0x8b) {
403 // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg"
404 loc[-2] = 0xc7;
405 loc[-1] = 0xc0 | reg;
406 } else {
407 // "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg"
408 loc[-2] = 0x8d;
409 loc[-1] = 0x80 | (reg << 3) | reg;
410 }
411 }
412 write32le(loc, val);
413 }
414
relaxTlsLdToLe(uint8_t * loc,const Relocation & rel,uint64_t val) const415 void X86::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
416 uint64_t val) const {
417 if (rel.type == R_386_TLS_LDO_32) {
418 write32le(loc, val);
419 return;
420 }
421
422 // Convert
423 // leal foo(%reg),%eax
424 // call ___tls_get_addr
425 // to
426 // movl %gs:0,%eax
427 // nop
428 // leal 0(%esi,1),%esi
429 const uint8_t inst[] = {
430 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
431 0x90, // nop
432 0x8d, 0x74, 0x26, 0x00, // leal 0(%esi,1),%esi
433 };
434 memcpy(loc - 2, inst, sizeof(inst));
435 }
436
437 // If Intel Indirect Branch Tracking is enabled, we have to emit special PLT
438 // entries containing endbr32 instructions. A PLT entry will be split into two
439 // parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt).
440 namespace {
441 class IntelIBT : public X86 {
442 public:
443 IntelIBT();
444 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
445 void writePlt(uint8_t *buf, const Symbol &sym,
446 uint64_t pltEntryAddr) const override;
447 void writeIBTPlt(uint8_t *buf, size_t numEntries) const override;
448
449 static const unsigned IBTPltHeaderSize = 16;
450 };
451 } // namespace
452
IntelIBT()453 IntelIBT::IntelIBT() { pltHeaderSize = 0; }
454
writeGotPlt(uint8_t * buf,const Symbol & s) const455 void IntelIBT::writeGotPlt(uint8_t *buf, const Symbol &s) const {
456 uint64_t va =
457 in.ibtPlt->getVA() + IBTPltHeaderSize + s.pltIndex * pltEntrySize;
458 write32le(buf, va);
459 }
460
writePlt(uint8_t * buf,const Symbol & sym,uint64_t) const461 void IntelIBT::writePlt(uint8_t *buf, const Symbol &sym,
462 uint64_t /*pltEntryAddr*/) const {
463 if (config->isPic) {
464 const uint8_t inst[] = {
465 0xf3, 0x0f, 0x1e, 0xfb, // endbr32
466 0xff, 0xa3, 0, 0, 0, 0, // jmp *name@GOT(%ebx)
467 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
468 };
469 memcpy(buf, inst, sizeof(inst));
470 write32le(buf + 6, sym.getGotPltVA() - in.gotPlt->getVA());
471 return;
472 }
473
474 const uint8_t inst[] = {
475 0xf3, 0x0f, 0x1e, 0xfb, // endbr32
476 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
477 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
478 };
479 memcpy(buf, inst, sizeof(inst));
480 write32le(buf + 6, sym.getGotPltVA());
481 }
482
writeIBTPlt(uint8_t * buf,size_t numEntries) const483 void IntelIBT::writeIBTPlt(uint8_t *buf, size_t numEntries) const {
484 writePltHeader(buf);
485 buf += IBTPltHeaderSize;
486
487 const uint8_t inst[] = {
488 0xf3, 0x0f, 0x1e, 0xfb, // endbr32
489 0x68, 0, 0, 0, 0, // pushl $reloc_offset
490 0xe9, 0, 0, 0, 0, // jmpq .PLT0@PC
491 0x66, 0x90, // nop
492 };
493
494 for (size_t i = 0; i < numEntries; ++i) {
495 memcpy(buf, inst, sizeof(inst));
496 write32le(buf + 5, i * sizeof(object::ELF32LE::Rel));
497 write32le(buf + 10, -pltHeaderSize - sizeof(inst) * i - 30);
498 buf += sizeof(inst);
499 }
500 }
501
502 namespace {
503 class RetpolinePic : public X86 {
504 public:
505 RetpolinePic();
506 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
507 void writePltHeader(uint8_t *buf) const override;
508 void writePlt(uint8_t *buf, const Symbol &sym,
509 uint64_t pltEntryAddr) const override;
510 };
511
512 class RetpolineNoPic : public X86 {
513 public:
514 RetpolineNoPic();
515 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
516 void writePltHeader(uint8_t *buf) const override;
517 void writePlt(uint8_t *buf, const Symbol &sym,
518 uint64_t pltEntryAddr) const override;
519 };
520 } // namespace
521
RetpolinePic()522 RetpolinePic::RetpolinePic() {
523 pltHeaderSize = 48;
524 pltEntrySize = 32;
525 ipltEntrySize = 32;
526 }
527
writeGotPlt(uint8_t * buf,const Symbol & s) const528 void RetpolinePic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
529 write32le(buf, s.getPltVA() + 17);
530 }
531
writePltHeader(uint8_t * buf) const532 void RetpolinePic::writePltHeader(uint8_t *buf) const {
533 const uint8_t insn[] = {
534 0xff, 0xb3, 4, 0, 0, 0, // 0: pushl 4(%ebx)
535 0x50, // 6: pushl %eax
536 0x8b, 0x83, 8, 0, 0, 0, // 7: mov 8(%ebx), %eax
537 0xe8, 0x0e, 0x00, 0x00, 0x00, // d: call next
538 0xf3, 0x90, // 12: loop: pause
539 0x0f, 0xae, 0xe8, // 14: lfence
540 0xeb, 0xf9, // 17: jmp loop
541 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19: int3; .align 16
542 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp)
543 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx
544 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp)
545 0x89, 0xc8, // 2b: mov %ecx, %eax
546 0x59, // 2d: pop %ecx
547 0xc3, // 2e: ret
548 0xcc, // 2f: int3; padding
549 };
550 memcpy(buf, insn, sizeof(insn));
551 }
552
writePlt(uint8_t * buf,const Symbol & sym,uint64_t pltEntryAddr) const553 void RetpolinePic::writePlt(uint8_t *buf, const Symbol &sym,
554 uint64_t pltEntryAddr) const {
555 unsigned relOff = in.relaPlt->entsize * sym.pltIndex;
556 const uint8_t insn[] = {
557 0x50, // pushl %eax
558 0x8b, 0x83, 0, 0, 0, 0, // mov foo@GOT(%ebx), %eax
559 0xe8, 0, 0, 0, 0, // call plt+0x20
560 0xe9, 0, 0, 0, 0, // jmp plt+0x12
561 0x68, 0, 0, 0, 0, // pushl $reloc_offset
562 0xe9, 0, 0, 0, 0, // jmp plt+0
563 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // int3; padding
564 };
565 memcpy(buf, insn, sizeof(insn));
566
567 uint32_t ebx = in.gotPlt->getVA();
568 unsigned off = pltEntryAddr - in.plt->getVA();
569 write32le(buf + 3, sym.getGotPltVA() - ebx);
570 write32le(buf + 8, -off - 12 + 32);
571 write32le(buf + 13, -off - 17 + 18);
572 write32le(buf + 18, relOff);
573 write32le(buf + 23, -off - 27);
574 }
575
RetpolineNoPic()576 RetpolineNoPic::RetpolineNoPic() {
577 pltHeaderSize = 48;
578 pltEntrySize = 32;
579 ipltEntrySize = 32;
580 }
581
writeGotPlt(uint8_t * buf,const Symbol & s) const582 void RetpolineNoPic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
583 write32le(buf, s.getPltVA() + 16);
584 }
585
writePltHeader(uint8_t * buf) const586 void RetpolineNoPic::writePltHeader(uint8_t *buf) const {
587 const uint8_t insn[] = {
588 0xff, 0x35, 0, 0, 0, 0, // 0: pushl GOTPLT+4
589 0x50, // 6: pushl %eax
590 0xa1, 0, 0, 0, 0, // 7: mov GOTPLT+8, %eax
591 0xe8, 0x0f, 0x00, 0x00, 0x00, // c: call next
592 0xf3, 0x90, // 11: loop: pause
593 0x0f, 0xae, 0xe8, // 13: lfence
594 0xeb, 0xf9, // 16: jmp loop
595 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 18: int3
596 0xcc, 0xcc, 0xcc, // 1f: int3; .align 16
597 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp)
598 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx
599 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp)
600 0x89, 0xc8, // 2b: mov %ecx, %eax
601 0x59, // 2d: pop %ecx
602 0xc3, // 2e: ret
603 0xcc, // 2f: int3; padding
604 };
605 memcpy(buf, insn, sizeof(insn));
606
607 uint32_t gotPlt = in.gotPlt->getVA();
608 write32le(buf + 2, gotPlt + 4);
609 write32le(buf + 8, gotPlt + 8);
610 }
611
writePlt(uint8_t * buf,const Symbol & sym,uint64_t pltEntryAddr) const612 void RetpolineNoPic::writePlt(uint8_t *buf, const Symbol &sym,
613 uint64_t pltEntryAddr) const {
614 unsigned relOff = in.relaPlt->entsize * sym.pltIndex;
615 const uint8_t insn[] = {
616 0x50, // 0: pushl %eax
617 0xa1, 0, 0, 0, 0, // 1: mov foo_in_GOT, %eax
618 0xe8, 0, 0, 0, 0, // 6: call plt+0x20
619 0xe9, 0, 0, 0, 0, // b: jmp plt+0x11
620 0x68, 0, 0, 0, 0, // 10: pushl $reloc_offset
621 0xe9, 0, 0, 0, 0, // 15: jmp plt+0
622 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding
623 0xcc, // 1f: int3; padding
624 };
625 memcpy(buf, insn, sizeof(insn));
626
627 unsigned off = pltEntryAddr - in.plt->getVA();
628 write32le(buf + 2, sym.getGotPltVA());
629 write32le(buf + 7, -off - 11 + 32);
630 write32le(buf + 12, -off - 16 + 17);
631 write32le(buf + 17, relOff);
632 write32le(buf + 22, -off - 26);
633 }
634
getX86TargetInfo()635 TargetInfo *elf::getX86TargetInfo() {
636 if (config->zRetpolineplt) {
637 if (config->isPic) {
638 static RetpolinePic t;
639 return &t;
640 }
641 static RetpolineNoPic t;
642 return &t;
643 }
644
645 if (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT) {
646 static IntelIBT t;
647 return &t;
648 }
649
650 static X86 t;
651 return &t;
652 }
653