1 //===- X86_64.cpp ---------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "OutputSections.h"
10 #include "Symbols.h"
11 #include "SyntheticSections.h"
12 #include "Target.h"
13 #include "lld/Common/ErrorHandler.h"
14 #include "llvm/BinaryFormat/ELF.h"
15 #include "llvm/Support/Endian.h"
16
17 using namespace llvm;
18 using namespace llvm::object;
19 using namespace llvm::support::endian;
20 using namespace llvm::ELF;
21 using namespace lld;
22 using namespace lld::elf;
23
24 namespace {
25 class X86_64 : public TargetInfo {
26 public:
27 X86_64();
28 int getTlsGdRelaxSkip(RelType type) const override;
29 RelExpr getRelExpr(RelType type, const Symbol &s,
30 const uint8_t *loc) const override;
31 RelType getDynRel(RelType type) const override;
32 void writeGotPltHeader(uint8_t *buf) const override;
33 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
34 void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
35 void writePltHeader(uint8_t *buf) const override;
36 void writePlt(uint8_t *buf, const Symbol &sym,
37 uint64_t pltEntryAddr) const override;
38 void relocate(uint8_t *loc, const Relocation &rel,
39 uint64_t val) const override;
40 int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
41 void applyJumpInstrMod(uint8_t *loc, JumpModType type,
42 unsigned size) const override;
43 RelExpr adjustGotPcExpr(RelType type, int64_t addend,
44 const uint8_t *loc) const override;
45 void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
46 bool adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end,
47 uint8_t stOther) const override;
48 bool deleteFallThruJmpInsn(InputSection &is, InputFile *file,
49 InputSection *nextIS) const override;
50 };
51 } // namespace
52
53 // This is vector of NOP instructions of sizes from 1 to 8 bytes. The
54 // appropriately sized instructions are used to fill the gaps between sections
55 // which are executed during fall through.
56 static const std::vector<std::vector<uint8_t>> nopInstructions = {
57 {0x90},
58 {0x66, 0x90},
59 {0x0f, 0x1f, 0x00},
60 {0x0f, 0x1f, 0x40, 0x00},
61 {0x0f, 0x1f, 0x44, 0x00, 0x00},
62 {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
63 {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
64 {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
65 {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}};
66
X86_64()67 X86_64::X86_64() {
68 copyRel = R_X86_64_COPY;
69 gotRel = R_X86_64_GLOB_DAT;
70 pltRel = R_X86_64_JUMP_SLOT;
71 relativeRel = R_X86_64_RELATIVE;
72 iRelativeRel = R_X86_64_IRELATIVE;
73 symbolicRel = R_X86_64_64;
74 tlsDescRel = R_X86_64_TLSDESC;
75 tlsGotRel = R_X86_64_TPOFF64;
76 tlsModuleIndexRel = R_X86_64_DTPMOD64;
77 tlsOffsetRel = R_X86_64_DTPOFF64;
78 gotBaseSymInGotPlt = true;
79 gotEntrySize = 8;
80 pltHeaderSize = 16;
81 pltEntrySize = 16;
82 ipltEntrySize = 16;
83 trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
84 nopInstrs = nopInstructions;
85
86 // Align to the large page size (known as a superpage or huge page).
87 // FreeBSD automatically promotes large, superpage-aligned allocations.
88 defaultImageBase = 0x200000;
89 }
90
getTlsGdRelaxSkip(RelType type) const91 int X86_64::getTlsGdRelaxSkip(RelType type) const {
92 // TLSDESC relocations are processed separately. See relaxTlsGdToLe below.
93 return type == R_X86_64_GOTPC32_TLSDESC || type == R_X86_64_TLSDESC_CALL ? 1
94 : 2;
95 }
96
97 // Opcodes for the different X86_64 jmp instructions.
98 enum JmpInsnOpcode : uint32_t {
99 J_JMP_32,
100 J_JNE_32,
101 J_JE_32,
102 J_JG_32,
103 J_JGE_32,
104 J_JB_32,
105 J_JBE_32,
106 J_JL_32,
107 J_JLE_32,
108 J_JA_32,
109 J_JAE_32,
110 J_UNKNOWN,
111 };
112
113 // Given the first (optional) and second byte of the insn's opcode, this
114 // returns the corresponding enum value.
getJmpInsnType(const uint8_t * first,const uint8_t * second)115 static JmpInsnOpcode getJmpInsnType(const uint8_t *first,
116 const uint8_t *second) {
117 if (*second == 0xe9)
118 return J_JMP_32;
119
120 if (first == nullptr)
121 return J_UNKNOWN;
122
123 if (*first == 0x0f) {
124 switch (*second) {
125 case 0x84:
126 return J_JE_32;
127 case 0x85:
128 return J_JNE_32;
129 case 0x8f:
130 return J_JG_32;
131 case 0x8d:
132 return J_JGE_32;
133 case 0x82:
134 return J_JB_32;
135 case 0x86:
136 return J_JBE_32;
137 case 0x8c:
138 return J_JL_32;
139 case 0x8e:
140 return J_JLE_32;
141 case 0x87:
142 return J_JA_32;
143 case 0x83:
144 return J_JAE_32;
145 }
146 }
147 return J_UNKNOWN;
148 }
149
150 // Return the relocation index for input section IS with a specific Offset.
151 // Returns the maximum size of the vector if no such relocation is found.
getRelocationWithOffset(const InputSection & is,uint64_t offset)152 static unsigned getRelocationWithOffset(const InputSection &is,
153 uint64_t offset) {
154 unsigned size = is.relocs().size();
155 for (unsigned i = size - 1; i + 1 > 0; --i) {
156 if (is.relocs()[i].offset == offset && is.relocs()[i].expr != R_NONE)
157 return i;
158 }
159 return size;
160 }
161
162 // Returns true if R corresponds to a relocation used for a jump instruction.
163 // TODO: Once special relocations for relaxable jump instructions are available,
164 // this should be modified to use those relocations.
isRelocationForJmpInsn(Relocation & R)165 static bool isRelocationForJmpInsn(Relocation &R) {
166 return R.type == R_X86_64_PLT32 || R.type == R_X86_64_PC32 ||
167 R.type == R_X86_64_PC8;
168 }
169
170 // Return true if Relocation R points to the first instruction in the
171 // next section.
172 // TODO: Delete this once psABI reserves a new relocation type for fall thru
173 // jumps.
isFallThruRelocation(InputSection & is,InputFile * file,InputSection * nextIS,Relocation & r)174 static bool isFallThruRelocation(InputSection &is, InputFile *file,
175 InputSection *nextIS, Relocation &r) {
176 if (!isRelocationForJmpInsn(r))
177 return false;
178
179 uint64_t addrLoc = is.getOutputSection()->addr + is.outSecOff + r.offset;
180 uint64_t targetOffset = InputSectionBase::getRelocTargetVA(
181 file, r.type, r.addend, addrLoc, *r.sym, r.expr);
182
183 // If this jmp is a fall thru, the target offset is the beginning of the
184 // next section.
185 uint64_t nextSectionOffset =
186 nextIS->getOutputSection()->addr + nextIS->outSecOff;
187 return (addrLoc + 4 + targetOffset) == nextSectionOffset;
188 }
189
190 // Return the jmp instruction opcode that is the inverse of the given
191 // opcode. For example, JE inverted is JNE.
invertJmpOpcode(const JmpInsnOpcode opcode)192 static JmpInsnOpcode invertJmpOpcode(const JmpInsnOpcode opcode) {
193 switch (opcode) {
194 case J_JE_32:
195 return J_JNE_32;
196 case J_JNE_32:
197 return J_JE_32;
198 case J_JG_32:
199 return J_JLE_32;
200 case J_JGE_32:
201 return J_JL_32;
202 case J_JB_32:
203 return J_JAE_32;
204 case J_JBE_32:
205 return J_JA_32;
206 case J_JL_32:
207 return J_JGE_32;
208 case J_JLE_32:
209 return J_JG_32;
210 case J_JA_32:
211 return J_JBE_32;
212 case J_JAE_32:
213 return J_JB_32;
214 default:
215 return J_UNKNOWN;
216 }
217 }
218
219 // Deletes direct jump instruction in input sections that jumps to the
220 // following section as it is not required. If there are two consecutive jump
221 // instructions, it checks if they can be flipped and one can be deleted.
222 // For example:
223 // .section .text
224 // a.BB.foo:
225 // ...
226 // 10: jne aa.BB.foo
227 // 16: jmp bar
228 // aa.BB.foo:
229 // ...
230 //
231 // can be converted to:
232 // a.BB.foo:
233 // ...
234 // 10: je bar #jne flipped to je and the jmp is deleted.
235 // aa.BB.foo:
236 // ...
deleteFallThruJmpInsn(InputSection & is,InputFile * file,InputSection * nextIS) const237 bool X86_64::deleteFallThruJmpInsn(InputSection &is, InputFile *file,
238 InputSection *nextIS) const {
239 const unsigned sizeOfDirectJmpInsn = 5;
240
241 if (nextIS == nullptr)
242 return false;
243
244 if (is.getSize() < sizeOfDirectJmpInsn)
245 return false;
246
247 // If this jmp insn can be removed, it is the last insn and the
248 // relocation is 4 bytes before the end.
249 unsigned rIndex = getRelocationWithOffset(is, is.getSize() - 4);
250 if (rIndex == is.relocs().size())
251 return false;
252
253 Relocation &r = is.relocs()[rIndex];
254
255 // Check if the relocation corresponds to a direct jmp.
256 const uint8_t *secContents = is.content().data();
257 // If it is not a direct jmp instruction, there is nothing to do here.
258 if (*(secContents + r.offset - 1) != 0xe9)
259 return false;
260
261 if (isFallThruRelocation(is, file, nextIS, r)) {
262 // This is a fall thru and can be deleted.
263 r.expr = R_NONE;
264 r.offset = 0;
265 is.drop_back(sizeOfDirectJmpInsn);
266 is.nopFiller = true;
267 return true;
268 }
269
270 // Now, check if flip and delete is possible.
271 const unsigned sizeOfJmpCCInsn = 6;
272 // To flip, there must be at least one JmpCC and one direct jmp.
273 if (is.getSize() < sizeOfDirectJmpInsn + sizeOfJmpCCInsn)
274 return false;
275
276 unsigned rbIndex =
277 getRelocationWithOffset(is, (is.getSize() - sizeOfDirectJmpInsn - 4));
278 if (rbIndex == is.relocs().size())
279 return false;
280
281 Relocation &rB = is.relocs()[rbIndex];
282
283 const uint8_t *jmpInsnB = secContents + rB.offset - 1;
284 JmpInsnOpcode jmpOpcodeB = getJmpInsnType(jmpInsnB - 1, jmpInsnB);
285 if (jmpOpcodeB == J_UNKNOWN)
286 return false;
287
288 if (!isFallThruRelocation(is, file, nextIS, rB))
289 return false;
290
291 // jmpCC jumps to the fall thru block, the branch can be flipped and the
292 // jmp can be deleted.
293 JmpInsnOpcode jInvert = invertJmpOpcode(jmpOpcodeB);
294 if (jInvert == J_UNKNOWN)
295 return false;
296 is.jumpInstrMod = make<JumpInstrMod>();
297 *is.jumpInstrMod = {rB.offset - 1, jInvert, 4};
298 // Move R's values to rB except the offset.
299 rB = {r.expr, r.type, rB.offset, r.addend, r.sym};
300 // Cancel R
301 r.expr = R_NONE;
302 r.offset = 0;
303 is.drop_back(sizeOfDirectJmpInsn);
304 is.nopFiller = true;
305 return true;
306 }
307
getRelExpr(RelType type,const Symbol & s,const uint8_t * loc) const308 RelExpr X86_64::getRelExpr(RelType type, const Symbol &s,
309 const uint8_t *loc) const {
310 switch (type) {
311 case R_X86_64_8:
312 case R_X86_64_16:
313 case R_X86_64_32:
314 case R_X86_64_32S:
315 case R_X86_64_64:
316 return R_ABS;
317 case R_X86_64_DTPOFF32:
318 case R_X86_64_DTPOFF64:
319 return R_DTPREL;
320 case R_X86_64_TPOFF32:
321 return R_TPREL;
322 case R_X86_64_TLSDESC_CALL:
323 return R_TLSDESC_CALL;
324 case R_X86_64_TLSLD:
325 return R_TLSLD_PC;
326 case R_X86_64_TLSGD:
327 return R_TLSGD_PC;
328 case R_X86_64_SIZE32:
329 case R_X86_64_SIZE64:
330 return R_SIZE;
331 case R_X86_64_PLT32:
332 return R_PLT_PC;
333 case R_X86_64_PC8:
334 case R_X86_64_PC16:
335 case R_X86_64_PC32:
336 case R_X86_64_PC64:
337 return R_PC;
338 case R_X86_64_GOT32:
339 case R_X86_64_GOT64:
340 return R_GOTPLT;
341 case R_X86_64_GOTPC32_TLSDESC:
342 return R_TLSDESC_PC;
343 case R_X86_64_GOTPCREL:
344 case R_X86_64_GOTPCRELX:
345 case R_X86_64_REX_GOTPCRELX:
346 case R_X86_64_GOTTPOFF:
347 return R_GOT_PC;
348 case R_X86_64_GOTOFF64:
349 return R_GOTPLTREL;
350 case R_X86_64_PLTOFF64:
351 return R_PLT_GOTPLT;
352 case R_X86_64_GOTPC32:
353 case R_X86_64_GOTPC64:
354 return R_GOTPLTONLY_PC;
355 case R_X86_64_NONE:
356 return R_NONE;
357 default:
358 error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
359 ") against symbol " + toString(s));
360 return R_NONE;
361 }
362 }
363
writeGotPltHeader(uint8_t * buf) const364 void X86_64::writeGotPltHeader(uint8_t *buf) const {
365 // The first entry holds the value of _DYNAMIC. It is not clear why that is
366 // required, but it is documented in the psabi and the glibc dynamic linker
367 // seems to use it (note that this is relevant for linking ld.so, not any
368 // other program).
369 write64le(buf, mainPart->dynamic->getVA());
370 }
371
writeGotPlt(uint8_t * buf,const Symbol & s) const372 void X86_64::writeGotPlt(uint8_t *buf, const Symbol &s) const {
373 // See comments in X86::writeGotPlt.
374 write64le(buf, s.getPltVA() + 6);
375 }
376
writeIgotPlt(uint8_t * buf,const Symbol & s) const377 void X86_64::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
378 // An x86 entry is the address of the ifunc resolver function (for -z rel).
379 if (config->writeAddends)
380 write64le(buf, s.getVA());
381 }
382
writePltHeader(uint8_t * buf) const383 void X86_64::writePltHeader(uint8_t *buf) const {
384 const uint8_t pltData[] = {
385 0xff, 0x35, 0, 0, 0, 0, // pushq GOTPLT+8(%rip)
386 0xff, 0x25, 0, 0, 0, 0, // jmp *GOTPLT+16(%rip)
387 0x0f, 0x1f, 0x40, 0x00, // nop
388 };
389 memcpy(buf, pltData, sizeof(pltData));
390 uint64_t gotPlt = in.gotPlt->getVA();
391 uint64_t plt = in.ibtPlt ? in.ibtPlt->getVA() : in.plt->getVA();
392 write32le(buf + 2, gotPlt - plt + 2); // GOTPLT+8
393 write32le(buf + 8, gotPlt - plt + 4); // GOTPLT+16
394 }
395
writePlt(uint8_t * buf,const Symbol & sym,uint64_t pltEntryAddr) const396 void X86_64::writePlt(uint8_t *buf, const Symbol &sym,
397 uint64_t pltEntryAddr) const {
398 const uint8_t inst[] = {
399 0xff, 0x25, 0, 0, 0, 0, // jmpq *got(%rip)
400 0x68, 0, 0, 0, 0, // pushq <relocation index>
401 0xe9, 0, 0, 0, 0, // jmpq plt[0]
402 };
403 memcpy(buf, inst, sizeof(inst));
404
405 write32le(buf + 2, sym.getGotPltVA() - pltEntryAddr - 6);
406 write32le(buf + 7, sym.getPltIdx());
407 write32le(buf + 12, in.plt->getVA() - pltEntryAddr - 16);
408 }
409
getDynRel(RelType type) const410 RelType X86_64::getDynRel(RelType type) const {
411 if (type == R_X86_64_64 || type == R_X86_64_PC64 || type == R_X86_64_SIZE32 ||
412 type == R_X86_64_SIZE64)
413 return type;
414 return R_X86_64_NONE;
415 }
416
relaxTlsGdToLe(uint8_t * loc,const Relocation & rel,uint64_t val)417 static void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) {
418 if (rel.type == R_X86_64_TLSGD) {
419 // Convert
420 // .byte 0x66
421 // leaq x@tlsgd(%rip), %rdi
422 // .word 0x6666
423 // rex64
424 // call __tls_get_addr@plt
425 // to the following two instructions.
426 const uint8_t inst[] = {
427 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00,
428 0x00, 0x00, // mov %fs:0x0,%rax
429 0x48, 0x8d, 0x80, 0, 0, 0, 0, // lea x@tpoff,%rax
430 };
431 memcpy(loc - 4, inst, sizeof(inst));
432
433 // The original code used a pc relative relocation and so we have to
434 // compensate for the -4 in had in the addend.
435 write32le(loc + 8, val + 4);
436 } else if (rel.type == R_X86_64_GOTPC32_TLSDESC) {
437 // Convert leaq x@tlsdesc(%rip), %REG to movq $x@tpoff, %REG.
438 if ((loc[-3] & 0xfb) != 0x48 || loc[-2] != 0x8d ||
439 (loc[-1] & 0xc7) != 0x05) {
440 errorOrWarn(getErrorLocation(loc - 3) +
441 "R_X86_64_GOTPC32_TLSDESC must be used "
442 "in leaq x@tlsdesc(%rip), %REG");
443 return;
444 }
445 loc[-3] = 0x48 | ((loc[-3] >> 2) & 1);
446 loc[-2] = 0xc7;
447 loc[-1] = 0xc0 | ((loc[-1] >> 3) & 7);
448 write32le(loc, val + 4);
449 } else {
450 // Convert call *x@tlsdesc(%REG) to xchg ax, ax.
451 assert(rel.type == R_X86_64_TLSDESC_CALL);
452 loc[0] = 0x66;
453 loc[1] = 0x90;
454 }
455 }
456
relaxTlsGdToIe(uint8_t * loc,const Relocation & rel,uint64_t val)457 static void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, uint64_t val) {
458 if (rel.type == R_X86_64_TLSGD) {
459 // Convert
460 // .byte 0x66
461 // leaq x@tlsgd(%rip), %rdi
462 // .word 0x6666
463 // rex64
464 // call __tls_get_addr@plt
465 // to the following two instructions.
466 const uint8_t inst[] = {
467 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00,
468 0x00, 0x00, // mov %fs:0x0,%rax
469 0x48, 0x03, 0x05, 0, 0, 0, 0, // addq x@gottpoff(%rip),%rax
470 };
471 memcpy(loc - 4, inst, sizeof(inst));
472
473 // Both code sequences are PC relatives, but since we are moving the
474 // constant forward by 8 bytes we have to subtract the value by 8.
475 write32le(loc + 8, val - 8);
476 } else if (rel.type == R_X86_64_GOTPC32_TLSDESC) {
477 // Convert leaq x@tlsdesc(%rip), %REG to movq x@gottpoff(%rip), %REG.
478 assert(rel.type == R_X86_64_GOTPC32_TLSDESC);
479 if ((loc[-3] & 0xfb) != 0x48 || loc[-2] != 0x8d ||
480 (loc[-1] & 0xc7) != 0x05) {
481 errorOrWarn(getErrorLocation(loc - 3) +
482 "R_X86_64_GOTPC32_TLSDESC must be used "
483 "in leaq x@tlsdesc(%rip), %REG");
484 return;
485 }
486 loc[-2] = 0x8b;
487 write32le(loc, val);
488 } else {
489 // Convert call *x@tlsdesc(%rax) to xchg ax, ax.
490 assert(rel.type == R_X86_64_TLSDESC_CALL);
491 loc[0] = 0x66;
492 loc[1] = 0x90;
493 }
494 }
495
496 // In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to
497 // R_X86_64_TPOFF32 so that it does not use GOT.
relaxTlsIeToLe(uint8_t * loc,const Relocation &,uint64_t val)498 static void relaxTlsIeToLe(uint8_t *loc, const Relocation &, uint64_t val) {
499 uint8_t *inst = loc - 3;
500 uint8_t reg = loc[-1] >> 3;
501 uint8_t *regSlot = loc - 1;
502
503 // Note that ADD with RSP or R12 is converted to ADD instead of LEA
504 // because LEA with these registers needs 4 bytes to encode and thus
505 // wouldn't fit the space.
506
507 if (memcmp(inst, "\x48\x03\x25", 3) == 0) {
508 // "addq foo@gottpoff(%rip),%rsp" -> "addq $foo,%rsp"
509 memcpy(inst, "\x48\x81\xc4", 3);
510 } else if (memcmp(inst, "\x4c\x03\x25", 3) == 0) {
511 // "addq foo@gottpoff(%rip),%r12" -> "addq $foo,%r12"
512 memcpy(inst, "\x49\x81\xc4", 3);
513 } else if (memcmp(inst, "\x4c\x03", 2) == 0) {
514 // "addq foo@gottpoff(%rip),%r[8-15]" -> "leaq foo(%r[8-15]),%r[8-15]"
515 memcpy(inst, "\x4d\x8d", 2);
516 *regSlot = 0x80 | (reg << 3) | reg;
517 } else if (memcmp(inst, "\x48\x03", 2) == 0) {
518 // "addq foo@gottpoff(%rip),%reg -> "leaq foo(%reg),%reg"
519 memcpy(inst, "\x48\x8d", 2);
520 *regSlot = 0x80 | (reg << 3) | reg;
521 } else if (memcmp(inst, "\x4c\x8b", 2) == 0) {
522 // "movq foo@gottpoff(%rip),%r[8-15]" -> "movq $foo,%r[8-15]"
523 memcpy(inst, "\x49\xc7", 2);
524 *regSlot = 0xc0 | reg;
525 } else if (memcmp(inst, "\x48\x8b", 2) == 0) {
526 // "movq foo@gottpoff(%rip),%reg" -> "movq $foo,%reg"
527 memcpy(inst, "\x48\xc7", 2);
528 *regSlot = 0xc0 | reg;
529 } else {
530 error(getErrorLocation(loc - 3) +
531 "R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only");
532 }
533
534 // The original code used a PC relative relocation.
535 // Need to compensate for the -4 it had in the addend.
536 write32le(loc, val + 4);
537 }
538
relaxTlsLdToLe(uint8_t * loc,const Relocation & rel,uint64_t val)539 static void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) {
540 const uint8_t inst[] = {
541 0x66, 0x66, // .word 0x6666
542 0x66, // .byte 0x66
543 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0,%rax
544 };
545
546 if (loc[4] == 0xe8) {
547 // Convert
548 // leaq bar@tlsld(%rip), %rdi # 48 8d 3d <Loc>
549 // callq __tls_get_addr@PLT # e8 <disp32>
550 // leaq bar@dtpoff(%rax), %rcx
551 // to
552 // .word 0x6666
553 // .byte 0x66
554 // mov %fs:0,%rax
555 // leaq bar@tpoff(%rax), %rcx
556 memcpy(loc - 3, inst, sizeof(inst));
557 return;
558 }
559
560 if (loc[4] == 0xff && loc[5] == 0x15) {
561 // Convert
562 // leaq x@tlsld(%rip),%rdi # 48 8d 3d <Loc>
563 // call *__tls_get_addr@GOTPCREL(%rip) # ff 15 <disp32>
564 // to
565 // .long 0x66666666
566 // movq %fs:0,%rax
567 // See "Table 11.9: LD -> LE Code Transition (LP64)" in
568 // https://raw.githubusercontent.com/wiki/hjl-tools/x86-psABI/x86-64-psABI-1.0.pdf
569 loc[-3] = 0x66;
570 memcpy(loc - 2, inst, sizeof(inst));
571 return;
572 }
573
574 error(getErrorLocation(loc - 3) +
575 "expected R_X86_64_PLT32 or R_X86_64_GOTPCRELX after R_X86_64_TLSLD");
576 }
577
578 // A JumpInstrMod at a specific offset indicates that the jump instruction
579 // opcode at that offset must be modified. This is specifically used to relax
580 // jump instructions with basic block sections. This function looks at the
581 // JumpMod and effects the change.
applyJumpInstrMod(uint8_t * loc,JumpModType type,unsigned size) const582 void X86_64::applyJumpInstrMod(uint8_t *loc, JumpModType type,
583 unsigned size) const {
584 switch (type) {
585 case J_JMP_32:
586 if (size == 4)
587 *loc = 0xe9;
588 else
589 *loc = 0xeb;
590 break;
591 case J_JE_32:
592 if (size == 4) {
593 loc[-1] = 0x0f;
594 *loc = 0x84;
595 } else
596 *loc = 0x74;
597 break;
598 case J_JNE_32:
599 if (size == 4) {
600 loc[-1] = 0x0f;
601 *loc = 0x85;
602 } else
603 *loc = 0x75;
604 break;
605 case J_JG_32:
606 if (size == 4) {
607 loc[-1] = 0x0f;
608 *loc = 0x8f;
609 } else
610 *loc = 0x7f;
611 break;
612 case J_JGE_32:
613 if (size == 4) {
614 loc[-1] = 0x0f;
615 *loc = 0x8d;
616 } else
617 *loc = 0x7d;
618 break;
619 case J_JB_32:
620 if (size == 4) {
621 loc[-1] = 0x0f;
622 *loc = 0x82;
623 } else
624 *loc = 0x72;
625 break;
626 case J_JBE_32:
627 if (size == 4) {
628 loc[-1] = 0x0f;
629 *loc = 0x86;
630 } else
631 *loc = 0x76;
632 break;
633 case J_JL_32:
634 if (size == 4) {
635 loc[-1] = 0x0f;
636 *loc = 0x8c;
637 } else
638 *loc = 0x7c;
639 break;
640 case J_JLE_32:
641 if (size == 4) {
642 loc[-1] = 0x0f;
643 *loc = 0x8e;
644 } else
645 *loc = 0x7e;
646 break;
647 case J_JA_32:
648 if (size == 4) {
649 loc[-1] = 0x0f;
650 *loc = 0x87;
651 } else
652 *loc = 0x77;
653 break;
654 case J_JAE_32:
655 if (size == 4) {
656 loc[-1] = 0x0f;
657 *loc = 0x83;
658 } else
659 *loc = 0x73;
660 break;
661 case J_UNKNOWN:
662 llvm_unreachable("Unknown Jump Relocation");
663 }
664 }
665
getImplicitAddend(const uint8_t * buf,RelType type) const666 int64_t X86_64::getImplicitAddend(const uint8_t *buf, RelType type) const {
667 switch (type) {
668 case R_X86_64_8:
669 case R_X86_64_PC8:
670 return SignExtend64<8>(*buf);
671 case R_X86_64_16:
672 case R_X86_64_PC16:
673 return SignExtend64<16>(read16le(buf));
674 case R_X86_64_32:
675 case R_X86_64_32S:
676 case R_X86_64_TPOFF32:
677 case R_X86_64_GOT32:
678 case R_X86_64_GOTPC32:
679 case R_X86_64_GOTPC32_TLSDESC:
680 case R_X86_64_GOTPCREL:
681 case R_X86_64_GOTPCRELX:
682 case R_X86_64_REX_GOTPCRELX:
683 case R_X86_64_PC32:
684 case R_X86_64_GOTTPOFF:
685 case R_X86_64_PLT32:
686 case R_X86_64_TLSGD:
687 case R_X86_64_TLSLD:
688 case R_X86_64_DTPOFF32:
689 case R_X86_64_SIZE32:
690 return SignExtend64<32>(read32le(buf));
691 case R_X86_64_64:
692 case R_X86_64_TPOFF64:
693 case R_X86_64_DTPOFF64:
694 case R_X86_64_DTPMOD64:
695 case R_X86_64_PC64:
696 case R_X86_64_SIZE64:
697 case R_X86_64_GLOB_DAT:
698 case R_X86_64_GOT64:
699 case R_X86_64_GOTOFF64:
700 case R_X86_64_GOTPC64:
701 case R_X86_64_PLTOFF64:
702 case R_X86_64_IRELATIVE:
703 case R_X86_64_RELATIVE:
704 return read64le(buf);
705 case R_X86_64_TLSDESC:
706 return read64le(buf + 8);
707 case R_X86_64_JUMP_SLOT:
708 case R_X86_64_NONE:
709 // These relocations are defined as not having an implicit addend.
710 return 0;
711 default:
712 internalLinkerError(getErrorLocation(buf),
713 "cannot read addend for relocation " + toString(type));
714 return 0;
715 }
716 }
717
718 static void relaxGot(uint8_t *loc, const Relocation &rel, uint64_t val);
719
relocate(uint8_t * loc,const Relocation & rel,uint64_t val) const720 void X86_64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
721 switch (rel.type) {
722 case R_X86_64_8:
723 checkIntUInt(loc, val, 8, rel);
724 *loc = val;
725 break;
726 case R_X86_64_PC8:
727 checkInt(loc, val, 8, rel);
728 *loc = val;
729 break;
730 case R_X86_64_16:
731 checkIntUInt(loc, val, 16, rel);
732 write16le(loc, val);
733 break;
734 case R_X86_64_PC16:
735 checkInt(loc, val, 16, rel);
736 write16le(loc, val);
737 break;
738 case R_X86_64_32:
739 checkUInt(loc, val, 32, rel);
740 write32le(loc, val);
741 break;
742 case R_X86_64_32S:
743 case R_X86_64_GOT32:
744 case R_X86_64_GOTPC32:
745 case R_X86_64_GOTPCREL:
746 case R_X86_64_PC32:
747 case R_X86_64_PLT32:
748 case R_X86_64_DTPOFF32:
749 case R_X86_64_SIZE32:
750 checkInt(loc, val, 32, rel);
751 write32le(loc, val);
752 break;
753 case R_X86_64_64:
754 case R_X86_64_DTPOFF64:
755 case R_X86_64_PC64:
756 case R_X86_64_SIZE64:
757 case R_X86_64_GOT64:
758 case R_X86_64_GOTOFF64:
759 case R_X86_64_GOTPC64:
760 case R_X86_64_PLTOFF64:
761 write64le(loc, val);
762 break;
763 case R_X86_64_GOTPCRELX:
764 case R_X86_64_REX_GOTPCRELX:
765 if (rel.expr != R_GOT_PC) {
766 relaxGot(loc, rel, val);
767 } else {
768 checkInt(loc, val, 32, rel);
769 write32le(loc, val);
770 }
771 break;
772 case R_X86_64_GOTPC32_TLSDESC:
773 case R_X86_64_TLSDESC_CALL:
774 case R_X86_64_TLSGD:
775 if (rel.expr == R_RELAX_TLS_GD_TO_LE) {
776 relaxTlsGdToLe(loc, rel, val);
777 } else if (rel.expr == R_RELAX_TLS_GD_TO_IE) {
778 relaxTlsGdToIe(loc, rel, val);
779 } else {
780 checkInt(loc, val, 32, rel);
781 write32le(loc, val);
782 }
783 break;
784 case R_X86_64_TLSLD:
785 if (rel.expr == R_RELAX_TLS_LD_TO_LE) {
786 relaxTlsLdToLe(loc, rel, val);
787 } else {
788 checkInt(loc, val, 32, rel);
789 write32le(loc, val);
790 }
791 break;
792 case R_X86_64_GOTTPOFF:
793 if (rel.expr == R_RELAX_TLS_IE_TO_LE) {
794 relaxTlsIeToLe(loc, rel, val);
795 } else {
796 checkInt(loc, val, 32, rel);
797 write32le(loc, val);
798 }
799 break;
800 case R_X86_64_TPOFF32:
801 checkInt(loc, val, 32, rel);
802 write32le(loc, val);
803 break;
804
805 case R_X86_64_TLSDESC:
806 // The addend is stored in the second 64-bit word.
807 write64le(loc + 8, val);
808 break;
809 default:
810 llvm_unreachable("unknown relocation");
811 }
812 }
813
adjustGotPcExpr(RelType type,int64_t addend,const uint8_t * loc) const814 RelExpr X86_64::adjustGotPcExpr(RelType type, int64_t addend,
815 const uint8_t *loc) const {
816 // Only R_X86_64_[REX_]GOTPCRELX can be relaxed. GNU as may emit GOTPCRELX
817 // with addend != -4. Such an instruction does not load the full GOT entry, so
818 // we cannot relax the relocation. E.g. movl x@GOTPCREL+4(%rip), %rax
819 // (addend=0) loads the high 32 bits of the GOT entry.
820 if (!config->relax || addend != -4 ||
821 (type != R_X86_64_GOTPCRELX && type != R_X86_64_REX_GOTPCRELX))
822 return R_GOT_PC;
823 const uint8_t op = loc[-2];
824 const uint8_t modRm = loc[-1];
825
826 // FIXME: When PIC is disabled and foo is defined locally in the
827 // lower 32 bit address space, memory operand in mov can be converted into
828 // immediate operand. Otherwise, mov must be changed to lea. We support only
829 // latter relaxation at this moment.
830 if (op == 0x8b)
831 return R_RELAX_GOT_PC;
832
833 // Relax call and jmp.
834 if (op == 0xff && (modRm == 0x15 || modRm == 0x25))
835 return R_RELAX_GOT_PC;
836
837 // We don't support test/binop instructions without a REX prefix.
838 if (type == R_X86_64_GOTPCRELX)
839 return R_GOT_PC;
840
841 // Relaxation of test, adc, add, and, cmp, or, sbb, sub, xor.
842 // If PIC then no relaxation is available.
843 return config->isPic ? R_GOT_PC : R_RELAX_GOT_PC_NOPIC;
844 }
845
846 // A subset of relaxations can only be applied for no-PIC. This method
847 // handles such relaxations. Instructions encoding information was taken from:
848 // "Intel 64 and IA-32 Architectures Software Developer's Manual V2"
849 // (http://www.intel.com/content/dam/www/public/us/en/documents/manuals/
850 // 64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf)
relaxGotNoPic(uint8_t * loc,uint64_t val,uint8_t op,uint8_t modRm)851 static void relaxGotNoPic(uint8_t *loc, uint64_t val, uint8_t op,
852 uint8_t modRm) {
853 const uint8_t rex = loc[-3];
854 // Convert "test %reg, foo@GOTPCREL(%rip)" to "test $foo, %reg".
855 if (op == 0x85) {
856 // See "TEST-Logical Compare" (4-428 Vol. 2B),
857 // TEST r/m64, r64 uses "full" ModR / M byte (no opcode extension).
858
859 // ModR/M byte has form XX YYY ZZZ, where
860 // YYY is MODRM.reg(register 2), ZZZ is MODRM.rm(register 1).
861 // XX has different meanings:
862 // 00: The operand's memory address is in reg1.
863 // 01: The operand's memory address is reg1 + a byte-sized displacement.
864 // 10: The operand's memory address is reg1 + a word-sized displacement.
865 // 11: The operand is reg1 itself.
866 // If an instruction requires only one operand, the unused reg2 field
867 // holds extra opcode bits rather than a register code
868 // 0xC0 == 11 000 000 binary.
869 // 0x38 == 00 111 000 binary.
870 // We transfer reg2 to reg1 here as operand.
871 // See "2.1.3 ModR/M and SIB Bytes" (Vol. 2A 2-3).
872 loc[-1] = 0xc0 | (modRm & 0x38) >> 3; // ModR/M byte.
873
874 // Change opcode from TEST r/m64, r64 to TEST r/m64, imm32
875 // See "TEST-Logical Compare" (4-428 Vol. 2B).
876 loc[-2] = 0xf7;
877
878 // Move R bit to the B bit in REX byte.
879 // REX byte is encoded as 0100WRXB, where
880 // 0100 is 4bit fixed pattern.
881 // REX.W When 1, a 64-bit operand size is used. Otherwise, when 0, the
882 // default operand size is used (which is 32-bit for most but not all
883 // instructions).
884 // REX.R This 1-bit value is an extension to the MODRM.reg field.
885 // REX.X This 1-bit value is an extension to the SIB.index field.
886 // REX.B This 1-bit value is an extension to the MODRM.rm field or the
887 // SIB.base field.
888 // See "2.2.1.2 More on REX Prefix Fields " (2-8 Vol. 2A).
889 loc[-3] = (rex & ~0x4) | (rex & 0x4) >> 2;
890 write32le(loc, val);
891 return;
892 }
893
894 // If we are here then we need to relax the adc, add, and, cmp, or, sbb, sub
895 // or xor operations.
896
897 // Convert "binop foo@GOTPCREL(%rip), %reg" to "binop $foo, %reg".
898 // Logic is close to one for test instruction above, but we also
899 // write opcode extension here, see below for details.
900 loc[-1] = 0xc0 | (modRm & 0x38) >> 3 | (op & 0x3c); // ModR/M byte.
901
902 // Primary opcode is 0x81, opcode extension is one of:
903 // 000b = ADD, 001b is OR, 010b is ADC, 011b is SBB,
904 // 100b is AND, 101b is SUB, 110b is XOR, 111b is CMP.
905 // This value was wrote to MODRM.reg in a line above.
906 // See "3.2 INSTRUCTIONS (A-M)" (Vol. 2A 3-15),
907 // "INSTRUCTION SET REFERENCE, N-Z" (Vol. 2B 4-1) for
908 // descriptions about each operation.
909 loc[-2] = 0x81;
910 loc[-3] = (rex & ~0x4) | (rex & 0x4) >> 2;
911 write32le(loc, val);
912 }
913
relaxGot(uint8_t * loc,const Relocation & rel,uint64_t val)914 static void relaxGot(uint8_t *loc, const Relocation &rel, uint64_t val) {
915 checkInt(loc, val, 32, rel);
916 const uint8_t op = loc[-2];
917 const uint8_t modRm = loc[-1];
918
919 // Convert "mov foo@GOTPCREL(%rip),%reg" to "lea foo(%rip),%reg".
920 if (op == 0x8b) {
921 loc[-2] = 0x8d;
922 write32le(loc, val);
923 return;
924 }
925
926 if (op != 0xff) {
927 // We are relaxing a rip relative to an absolute, so compensate
928 // for the old -4 addend.
929 assert(!config->isPic);
930 relaxGotNoPic(loc, val + 4, op, modRm);
931 return;
932 }
933
934 // Convert call/jmp instructions.
935 if (modRm == 0x15) {
936 // ABI says we can convert "call *foo@GOTPCREL(%rip)" to "nop; call foo".
937 // Instead we convert to "addr32 call foo" where addr32 is an instruction
938 // prefix. That makes result expression to be a single instruction.
939 loc[-2] = 0x67; // addr32 prefix
940 loc[-1] = 0xe8; // call
941 write32le(loc, val);
942 return;
943 }
944
945 // Convert "jmp *foo@GOTPCREL(%rip)" to "jmp foo; nop".
946 // jmp doesn't return, so it is fine to use nop here, it is just a stub.
947 assert(modRm == 0x25);
948 loc[-2] = 0xe9; // jmp
949 loc[3] = 0x90; // nop
950 write32le(loc - 1, val + 1);
951 }
952
953 // A split-stack prologue starts by checking the amount of stack remaining
954 // in one of two ways:
955 // A) Comparing of the stack pointer to a field in the tcb.
956 // B) Or a load of a stack pointer offset with an lea to r10 or r11.
adjustPrologueForCrossSplitStack(uint8_t * loc,uint8_t * end,uint8_t stOther) const957 bool X86_64::adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end,
958 uint8_t stOther) const {
959 if (!config->is64) {
960 error("target doesn't support split stacks");
961 return false;
962 }
963
964 if (loc + 8 >= end)
965 return false;
966
967 // Replace "cmp %fs:0x70,%rsp" and subsequent branch
968 // with "stc, nopl 0x0(%rax,%rax,1)"
969 if (memcmp(loc, "\x64\x48\x3b\x24\x25", 5) == 0) {
970 memcpy(loc, "\xf9\x0f\x1f\x84\x00\x00\x00\x00", 8);
971 return true;
972 }
973
974 // Adjust "lea X(%rsp),%rYY" to lea "(X - 0x4000)(%rsp),%rYY" where rYY could
975 // be r10 or r11. The lea instruction feeds a subsequent compare which checks
976 // if there is X available stack space. Making X larger effectively reserves
977 // that much additional space. The stack grows downward so subtract the value.
978 if (memcmp(loc, "\x4c\x8d\x94\x24", 4) == 0 ||
979 memcmp(loc, "\x4c\x8d\x9c\x24", 4) == 0) {
980 // The offset bytes are encoded four bytes after the start of the
981 // instruction.
982 write32le(loc + 4, read32le(loc + 4) - 0x4000);
983 return true;
984 }
985 return false;
986 }
987
relocateAlloc(InputSectionBase & sec,uint8_t * buf) const988 void X86_64::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
989 uint64_t secAddr = sec.getOutputSection()->addr;
990 if (auto *s = dyn_cast<InputSection>(&sec))
991 secAddr += s->outSecOff;
992 for (const Relocation &rel : sec.relocs()) {
993 if (rel.expr == R_NONE) // See deleteFallThruJmpInsn
994 continue;
995 uint8_t *loc = buf + rel.offset;
996 const uint64_t val =
997 sec.getRelocTargetVA(sec.file, rel.type, rel.addend,
998 secAddr + rel.offset, *rel.sym, rel.expr);
999 relocate(loc, rel, val);
1000 }
1001 if (sec.jumpInstrMod) {
1002 applyJumpInstrMod(buf + sec.jumpInstrMod->offset,
1003 sec.jumpInstrMod->original, sec.jumpInstrMod->size);
1004 }
1005 }
1006
1007 // If Intel Indirect Branch Tracking is enabled, we have to emit special PLT
1008 // entries containing endbr64 instructions. A PLT entry will be split into two
1009 // parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt).
1010 namespace {
1011 class IntelIBT : public X86_64 {
1012 public:
1013 IntelIBT();
1014 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
1015 void writePlt(uint8_t *buf, const Symbol &sym,
1016 uint64_t pltEntryAddr) const override;
1017 void writeIBTPlt(uint8_t *buf, size_t numEntries) const override;
1018
1019 static const unsigned IBTPltHeaderSize = 16;
1020 };
1021 } // namespace
1022
IntelIBT()1023 IntelIBT::IntelIBT() { pltHeaderSize = 0; }
1024
writeGotPlt(uint8_t * buf,const Symbol & s) const1025 void IntelIBT::writeGotPlt(uint8_t *buf, const Symbol &s) const {
1026 uint64_t va =
1027 in.ibtPlt->getVA() + IBTPltHeaderSize + s.getPltIdx() * pltEntrySize;
1028 write64le(buf, va);
1029 }
1030
writePlt(uint8_t * buf,const Symbol & sym,uint64_t pltEntryAddr) const1031 void IntelIBT::writePlt(uint8_t *buf, const Symbol &sym,
1032 uint64_t pltEntryAddr) const {
1033 const uint8_t Inst[] = {
1034 0xf3, 0x0f, 0x1e, 0xfa, // endbr64
1035 0xff, 0x25, 0, 0, 0, 0, // jmpq *got(%rip)
1036 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
1037 };
1038 memcpy(buf, Inst, sizeof(Inst));
1039 write32le(buf + 6, sym.getGotPltVA() - pltEntryAddr - 10);
1040 }
1041
writeIBTPlt(uint8_t * buf,size_t numEntries) const1042 void IntelIBT::writeIBTPlt(uint8_t *buf, size_t numEntries) const {
1043 writePltHeader(buf);
1044 buf += IBTPltHeaderSize;
1045
1046 const uint8_t inst[] = {
1047 0xf3, 0x0f, 0x1e, 0xfa, // endbr64
1048 0x68, 0, 0, 0, 0, // pushq <relocation index>
1049 0xe9, 0, 0, 0, 0, // jmpq plt[0]
1050 0x66, 0x90, // nop
1051 };
1052
1053 for (size_t i = 0; i < numEntries; ++i) {
1054 memcpy(buf, inst, sizeof(inst));
1055 write32le(buf + 5, i);
1056 write32le(buf + 10, -pltHeaderSize - sizeof(inst) * i - 30);
1057 buf += sizeof(inst);
1058 }
1059 }
1060
1061 // These nonstandard PLT entries are to migtigate Spectre v2 security
1062 // vulnerability. In order to mitigate Spectre v2, we want to avoid indirect
1063 // branch instructions such as `jmp *GOTPLT(%rip)`. So, in the following PLT
1064 // entries, we use a CALL followed by MOV and RET to do the same thing as an
1065 // indirect jump. That instruction sequence is so-called "retpoline".
1066 //
1067 // We have two types of retpoline PLTs as a size optimization. If `-z now`
1068 // is specified, all dynamic symbols are resolved at load-time. Thus, when
1069 // that option is given, we can omit code for symbol lazy resolution.
1070 namespace {
1071 class Retpoline : public X86_64 {
1072 public:
1073 Retpoline();
1074 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
1075 void writePltHeader(uint8_t *buf) const override;
1076 void writePlt(uint8_t *buf, const Symbol &sym,
1077 uint64_t pltEntryAddr) const override;
1078 };
1079
1080 class RetpolineZNow : public X86_64 {
1081 public:
1082 RetpolineZNow();
writeGotPlt(uint8_t * buf,const Symbol & s) const1083 void writeGotPlt(uint8_t *buf, const Symbol &s) const override {}
1084 void writePltHeader(uint8_t *buf) const override;
1085 void writePlt(uint8_t *buf, const Symbol &sym,
1086 uint64_t pltEntryAddr) const override;
1087 };
1088 } // namespace
1089
Retpoline()1090 Retpoline::Retpoline() {
1091 pltHeaderSize = 48;
1092 pltEntrySize = 32;
1093 ipltEntrySize = 32;
1094 }
1095
writeGotPlt(uint8_t * buf,const Symbol & s) const1096 void Retpoline::writeGotPlt(uint8_t *buf, const Symbol &s) const {
1097 write64le(buf, s.getPltVA() + 21);
1098 }
1099
writePltHeader(uint8_t * buf) const1100 void Retpoline::writePltHeader(uint8_t *buf) const {
1101 const uint8_t insn[] = {
1102 0xff, 0x35, 0, 0, 0, 0, // 0: pushq GOTPLT+8(%rip)
1103 0x4c, 0x8b, 0x1d, 0, 0, 0, 0, // 6: mov GOTPLT+16(%rip), %r11
1104 0xe8, 0x0e, 0x00, 0x00, 0x00, // d: callq next
1105 0xf3, 0x90, // 12: loop: pause
1106 0x0f, 0xae, 0xe8, // 14: lfence
1107 0xeb, 0xf9, // 17: jmp loop
1108 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19: int3; .align 16
1109 0x4c, 0x89, 0x1c, 0x24, // 20: next: mov %r11, (%rsp)
1110 0xc3, // 24: ret
1111 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 25: int3; padding
1112 0xcc, 0xcc, 0xcc, 0xcc, // 2c: int3; padding
1113 };
1114 memcpy(buf, insn, sizeof(insn));
1115
1116 uint64_t gotPlt = in.gotPlt->getVA();
1117 uint64_t plt = in.plt->getVA();
1118 write32le(buf + 2, gotPlt - plt - 6 + 8);
1119 write32le(buf + 9, gotPlt - plt - 13 + 16);
1120 }
1121
writePlt(uint8_t * buf,const Symbol & sym,uint64_t pltEntryAddr) const1122 void Retpoline::writePlt(uint8_t *buf, const Symbol &sym,
1123 uint64_t pltEntryAddr) const {
1124 const uint8_t insn[] = {
1125 0xf3, 0x0f, 0x1e, 0xfa, // 0: endbr64
1126 0x4c, 0x8b, 0x1d, 0, 0, 0, 0, // 4: mov foo@GOTPLT(%rip), %r11
1127 0xe8, 0, 0, 0, 0, // b: callq plt+0x20
1128 0xe9, 0, 0, 0, 0, // 10: jmp plt+0x12
1129 0x68, 0, 0, 0, 0, // 15: pushq <relocation index>
1130 0xe9, 0, 0, 0, 0, // 1a: jmp plt+0
1131 0xcc, // 1f: int3; padding
1132 };
1133 memcpy(buf, insn, sizeof(insn));
1134
1135 uint64_t off = pltEntryAddr - in.plt->getVA();
1136
1137 write32le(buf + 7, sym.getGotPltVA() - pltEntryAddr - 11);
1138 write32le(buf + 12, -off - 16 + 32);
1139 write32le(buf + 17, -off - 21 + 18);
1140 write32le(buf + 22, sym.getPltIdx());
1141 write32le(buf + 27, -off - 31);
1142 }
1143
RetpolineZNow()1144 RetpolineZNow::RetpolineZNow() {
1145 pltHeaderSize = 32;
1146 pltEntrySize = 16;
1147 ipltEntrySize = 16;
1148 }
1149
writePltHeader(uint8_t * buf) const1150 void RetpolineZNow::writePltHeader(uint8_t *buf) const {
1151 const uint8_t insn[] = {
1152 0xe8, 0x0b, 0x00, 0x00, 0x00, // 0: call next
1153 0xf3, 0x90, // 5: loop: pause
1154 0x0f, 0xae, 0xe8, // 7: lfence
1155 0xeb, 0xf9, // a: jmp loop
1156 0xcc, 0xcc, 0xcc, 0xcc, // c: int3; .align 16
1157 0x4c, 0x89, 0x1c, 0x24, // 10: next: mov %r11, (%rsp)
1158 0xc3, // 14: ret
1159 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 15: int3; padding
1160 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding
1161 0xcc, // 1f: int3; padding
1162 };
1163 memcpy(buf, insn, sizeof(insn));
1164 }
1165
writePlt(uint8_t * buf,const Symbol & sym,uint64_t pltEntryAddr) const1166 void RetpolineZNow::writePlt(uint8_t *buf, const Symbol &sym,
1167 uint64_t pltEntryAddr) const {
1168 const uint8_t insn[] = {
1169 0xf3, 0x0f, 0x1e, 0xfa, // 0: endbr64
1170 0x4c, 0x8b, 0x1d, 0, 0, 0, 0, // 4: mov foo@GOTPLT(%rip), %r11
1171 0xe9, 0, 0, 0, 0, // b: jmp plt+0
1172 };
1173 memcpy(buf, insn, sizeof(insn));
1174
1175 write32le(buf + 7, sym.getGotPltVA() - pltEntryAddr - 11);
1176 write32le(buf + 12, in.plt->getVA() - pltEntryAddr - 16);
1177 }
1178
getTargetInfo()1179 static TargetInfo *getTargetInfo() {
1180 if (config->zRetpolineplt) {
1181 if (config->zNow) {
1182 static RetpolineZNow t;
1183 return &t;
1184 }
1185 static Retpoline t;
1186 return &t;
1187 }
1188
1189 #ifdef __OpenBSD__
1190 static IntelIBT t;
1191 return &t;
1192 #else
1193 if (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT) {
1194 static IntelIBT t;
1195 return &t;
1196 }
1197
1198 static X86_64 t;
1199 return &t;
1200 #endif
1201 }
1202
getX86_64TargetInfo()1203 TargetInfo *elf::getX86_64TargetInfo() { return getTargetInfo(); }
1204