1 //===- X86_64.cpp ---------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "InputFiles.h"
10 #include "OutputSections.h"
11 #include "Symbols.h"
12 #include "SyntheticSections.h"
13 #include "Target.h"
14 #include "lld/Common/ErrorHandler.h"
15 #include "llvm/Object/ELF.h"
16 #include "llvm/Support/Endian.h"
17
18 using namespace llvm;
19 using namespace llvm::object;
20 using namespace llvm::support::endian;
21 using namespace llvm::ELF;
22 using namespace lld;
23 using namespace lld::elf;
24
25 namespace {
26 class X86_64 : public TargetInfo {
27 public:
28 X86_64();
29 int getTlsGdRelaxSkip(RelType type) const override;
30 RelExpr getRelExpr(RelType type, const Symbol &s,
31 const uint8_t *loc) const override;
32 RelType getDynRel(RelType type) const override;
33 void writeGotPltHeader(uint8_t *buf) const override;
34 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
35 void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
36 void writePltHeader(uint8_t *buf) const override;
37 void writePlt(uint8_t *buf, const Symbol &sym,
38 uint64_t pltEntryAddr) const override;
39 void relocate(uint8_t *loc, const Relocation &rel,
40 uint64_t val) const override;
41 int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
42 void applyJumpInstrMod(uint8_t *loc, JumpModType type,
43 unsigned size) const override;
44
45 RelExpr adjustGotPcExpr(RelType type, int64_t addend,
46 const uint8_t *loc) const override;
47 void relaxGot(uint8_t *loc, const Relocation &rel,
48 uint64_t val) const override;
49 void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
50 uint64_t val) const override;
51 void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
52 uint64_t val) const override;
53 void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
54 uint64_t val) const override;
55 void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
56 uint64_t val) const override;
57 bool adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end,
58 uint8_t stOther) const override;
59 bool deleteFallThruJmpInsn(InputSection &is, InputFile *file,
60 InputSection *nextIS) const override;
61 };
62 } // namespace
63
64 // This is vector of NOP instructions of sizes from 1 to 8 bytes. The
65 // appropriately sized instructions are used to fill the gaps between sections
66 // which are executed during fall through.
67 static const std::vector<std::vector<uint8_t>> nopInstructions = {
68 {0x90},
69 {0x66, 0x90},
70 {0x0f, 0x1f, 0x00},
71 {0x0f, 0x1f, 0x40, 0x00},
72 {0x0f, 0x1f, 0x44, 0x00, 0x00},
73 {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
74 {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
75 {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
76 {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}};
77
X86_64()78 X86_64::X86_64() {
79 copyRel = R_X86_64_COPY;
80 gotRel = R_X86_64_GLOB_DAT;
81 pltRel = R_X86_64_JUMP_SLOT;
82 relativeRel = R_X86_64_RELATIVE;
83 iRelativeRel = R_X86_64_IRELATIVE;
84 symbolicRel = R_X86_64_64;
85 tlsDescRel = R_X86_64_TLSDESC;
86 tlsGotRel = R_X86_64_TPOFF64;
87 tlsModuleIndexRel = R_X86_64_DTPMOD64;
88 tlsOffsetRel = R_X86_64_DTPOFF64;
89 gotBaseSymInGotPlt = true;
90 gotEntrySize = 8;
91 pltHeaderSize = 16;
92 pltEntrySize = 16;
93 ipltEntrySize = 16;
94 trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
95 nopInstrs = nopInstructions;
96
97 // Align to the large page size (known as a superpage or huge page).
98 // FreeBSD automatically promotes large, superpage-aligned allocations.
99 defaultImageBase = 0x200000;
100 }
101
getTlsGdRelaxSkip(RelType type) const102 int X86_64::getTlsGdRelaxSkip(RelType type) const { return 2; }
103
104 // Opcodes for the different X86_64 jmp instructions.
105 enum JmpInsnOpcode : uint32_t {
106 J_JMP_32,
107 J_JNE_32,
108 J_JE_32,
109 J_JG_32,
110 J_JGE_32,
111 J_JB_32,
112 J_JBE_32,
113 J_JL_32,
114 J_JLE_32,
115 J_JA_32,
116 J_JAE_32,
117 J_UNKNOWN,
118 };
119
120 // Given the first (optional) and second byte of the insn's opcode, this
121 // returns the corresponding enum value.
getJmpInsnType(const uint8_t * first,const uint8_t * second)122 static JmpInsnOpcode getJmpInsnType(const uint8_t *first,
123 const uint8_t *second) {
124 if (*second == 0xe9)
125 return J_JMP_32;
126
127 if (first == nullptr)
128 return J_UNKNOWN;
129
130 if (*first == 0x0f) {
131 switch (*second) {
132 case 0x84:
133 return J_JE_32;
134 case 0x85:
135 return J_JNE_32;
136 case 0x8f:
137 return J_JG_32;
138 case 0x8d:
139 return J_JGE_32;
140 case 0x82:
141 return J_JB_32;
142 case 0x86:
143 return J_JBE_32;
144 case 0x8c:
145 return J_JL_32;
146 case 0x8e:
147 return J_JLE_32;
148 case 0x87:
149 return J_JA_32;
150 case 0x83:
151 return J_JAE_32;
152 }
153 }
154 return J_UNKNOWN;
155 }
156
157 // Return the relocation index for input section IS with a specific Offset.
158 // Returns the maximum size of the vector if no such relocation is found.
getRelocationWithOffset(const InputSection & is,uint64_t offset)159 static unsigned getRelocationWithOffset(const InputSection &is,
160 uint64_t offset) {
161 unsigned size = is.relocations.size();
162 for (unsigned i = size - 1; i + 1 > 0; --i) {
163 if (is.relocations[i].offset == offset && is.relocations[i].expr != R_NONE)
164 return i;
165 }
166 return size;
167 }
168
169 // Returns true if R corresponds to a relocation used for a jump instruction.
170 // TODO: Once special relocations for relaxable jump instructions are available,
171 // this should be modified to use those relocations.
isRelocationForJmpInsn(Relocation & R)172 static bool isRelocationForJmpInsn(Relocation &R) {
173 return R.type == R_X86_64_PLT32 || R.type == R_X86_64_PC32 ||
174 R.type == R_X86_64_PC8;
175 }
176
177 // Return true if Relocation R points to the first instruction in the
178 // next section.
179 // TODO: Delete this once psABI reserves a new relocation type for fall thru
180 // jumps.
isFallThruRelocation(InputSection & is,InputFile * file,InputSection * nextIS,Relocation & r)181 static bool isFallThruRelocation(InputSection &is, InputFile *file,
182 InputSection *nextIS, Relocation &r) {
183 if (!isRelocationForJmpInsn(r))
184 return false;
185
186 uint64_t addrLoc = is.getOutputSection()->addr + is.outSecOff + r.offset;
187 uint64_t targetOffset = InputSectionBase::getRelocTargetVA(
188 file, r.type, r.addend, addrLoc, *r.sym, r.expr);
189
190 // If this jmp is a fall thru, the target offset is the beginning of the
191 // next section.
192 uint64_t nextSectionOffset =
193 nextIS->getOutputSection()->addr + nextIS->outSecOff;
194 return (addrLoc + 4 + targetOffset) == nextSectionOffset;
195 }
196
197 // Return the jmp instruction opcode that is the inverse of the given
198 // opcode. For example, JE inverted is JNE.
invertJmpOpcode(const JmpInsnOpcode opcode)199 static JmpInsnOpcode invertJmpOpcode(const JmpInsnOpcode opcode) {
200 switch (opcode) {
201 case J_JE_32:
202 return J_JNE_32;
203 case J_JNE_32:
204 return J_JE_32;
205 case J_JG_32:
206 return J_JLE_32;
207 case J_JGE_32:
208 return J_JL_32;
209 case J_JB_32:
210 return J_JAE_32;
211 case J_JBE_32:
212 return J_JA_32;
213 case J_JL_32:
214 return J_JGE_32;
215 case J_JLE_32:
216 return J_JG_32;
217 case J_JA_32:
218 return J_JBE_32;
219 case J_JAE_32:
220 return J_JB_32;
221 default:
222 return J_UNKNOWN;
223 }
224 }
225
226 // Deletes direct jump instruction in input sections that jumps to the
227 // following section as it is not required. If there are two consecutive jump
228 // instructions, it checks if they can be flipped and one can be deleted.
229 // For example:
230 // .section .text
231 // a.BB.foo:
232 // ...
233 // 10: jne aa.BB.foo
234 // 16: jmp bar
235 // aa.BB.foo:
236 // ...
237 //
238 // can be converted to:
239 // a.BB.foo:
240 // ...
241 // 10: je bar #jne flipped to je and the jmp is deleted.
242 // aa.BB.foo:
243 // ...
deleteFallThruJmpInsn(InputSection & is,InputFile * file,InputSection * nextIS) const244 bool X86_64::deleteFallThruJmpInsn(InputSection &is, InputFile *file,
245 InputSection *nextIS) const {
246 const unsigned sizeOfDirectJmpInsn = 5;
247
248 if (nextIS == nullptr)
249 return false;
250
251 if (is.getSize() < sizeOfDirectJmpInsn)
252 return false;
253
254 // If this jmp insn can be removed, it is the last insn and the
255 // relocation is 4 bytes before the end.
256 unsigned rIndex = getRelocationWithOffset(is, is.getSize() - 4);
257 if (rIndex == is.relocations.size())
258 return false;
259
260 Relocation &r = is.relocations[rIndex];
261
262 // Check if the relocation corresponds to a direct jmp.
263 const uint8_t *secContents = is.data().data();
264 // If it is not a direct jmp instruction, there is nothing to do here.
265 if (*(secContents + r.offset - 1) != 0xe9)
266 return false;
267
268 if (isFallThruRelocation(is, file, nextIS, r)) {
269 // This is a fall thru and can be deleted.
270 r.expr = R_NONE;
271 r.offset = 0;
272 is.drop_back(sizeOfDirectJmpInsn);
273 is.nopFiller = true;
274 return true;
275 }
276
277 // Now, check if flip and delete is possible.
278 const unsigned sizeOfJmpCCInsn = 6;
279 // To flip, there must be atleast one JmpCC and one direct jmp.
280 if (is.getSize() < sizeOfDirectJmpInsn + sizeOfJmpCCInsn)
281 return 0;
282
283 unsigned rbIndex =
284 getRelocationWithOffset(is, (is.getSize() - sizeOfDirectJmpInsn - 4));
285 if (rbIndex == is.relocations.size())
286 return 0;
287
288 Relocation &rB = is.relocations[rbIndex];
289
290 const uint8_t *jmpInsnB = secContents + rB.offset - 1;
291 JmpInsnOpcode jmpOpcodeB = getJmpInsnType(jmpInsnB - 1, jmpInsnB);
292 if (jmpOpcodeB == J_UNKNOWN)
293 return false;
294
295 if (!isFallThruRelocation(is, file, nextIS, rB))
296 return false;
297
298 // jmpCC jumps to the fall thru block, the branch can be flipped and the
299 // jmp can be deleted.
300 JmpInsnOpcode jInvert = invertJmpOpcode(jmpOpcodeB);
301 if (jInvert == J_UNKNOWN)
302 return false;
303 is.jumpInstrMods.push_back({jInvert, (rB.offset - 1), 4});
304 // Move R's values to rB except the offset.
305 rB = {r.expr, r.type, rB.offset, r.addend, r.sym};
306 // Cancel R
307 r.expr = R_NONE;
308 r.offset = 0;
309 is.drop_back(sizeOfDirectJmpInsn);
310 is.nopFiller = true;
311 return true;
312 }
313
getRelExpr(RelType type,const Symbol & s,const uint8_t * loc) const314 RelExpr X86_64::getRelExpr(RelType type, const Symbol &s,
315 const uint8_t *loc) const {
316 if (type == R_X86_64_GOTTPOFF)
317 config->hasStaticTlsModel = true;
318
319 switch (type) {
320 case R_X86_64_8:
321 case R_X86_64_16:
322 case R_X86_64_32:
323 case R_X86_64_32S:
324 case R_X86_64_64:
325 return R_ABS;
326 case R_X86_64_DTPOFF32:
327 case R_X86_64_DTPOFF64:
328 return R_DTPREL;
329 case R_X86_64_TPOFF32:
330 return R_TPREL;
331 case R_X86_64_TLSDESC_CALL:
332 return R_TLSDESC_CALL;
333 case R_X86_64_TLSLD:
334 return R_TLSLD_PC;
335 case R_X86_64_TLSGD:
336 return R_TLSGD_PC;
337 case R_X86_64_SIZE32:
338 case R_X86_64_SIZE64:
339 return R_SIZE;
340 case R_X86_64_PLT32:
341 return R_PLT_PC;
342 case R_X86_64_PC8:
343 case R_X86_64_PC16:
344 case R_X86_64_PC32:
345 case R_X86_64_PC64:
346 return R_PC;
347 case R_X86_64_GOT32:
348 case R_X86_64_GOT64:
349 return R_GOTPLT;
350 case R_X86_64_GOTPC32_TLSDESC:
351 return R_TLSDESC_PC;
352 case R_X86_64_GOTPCREL:
353 case R_X86_64_GOTPCRELX:
354 case R_X86_64_REX_GOTPCRELX:
355 case R_X86_64_GOTTPOFF:
356 return R_GOT_PC;
357 case R_X86_64_GOTOFF64:
358 return R_GOTPLTREL;
359 case R_X86_64_GOTPC32:
360 case R_X86_64_GOTPC64:
361 return R_GOTPLTONLY_PC;
362 case R_X86_64_NONE:
363 return R_NONE;
364 default:
365 error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
366 ") against symbol " + toString(s));
367 return R_NONE;
368 }
369 }
370
writeGotPltHeader(uint8_t * buf) const371 void X86_64::writeGotPltHeader(uint8_t *buf) const {
372 // The first entry holds the value of _DYNAMIC. It is not clear why that is
373 // required, but it is documented in the psabi and the glibc dynamic linker
374 // seems to use it (note that this is relevant for linking ld.so, not any
375 // other program).
376 write64le(buf, mainPart->dynamic->getVA());
377 }
378
writeGotPlt(uint8_t * buf,const Symbol & s) const379 void X86_64::writeGotPlt(uint8_t *buf, const Symbol &s) const {
380 // See comments in X86::writeGotPlt.
381 write64le(buf, s.getPltVA() + 6);
382 }
383
writeIgotPlt(uint8_t * buf,const Symbol & s) const384 void X86_64::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
385 // An x86 entry is the address of the ifunc resolver function (for -z rel).
386 if (config->writeAddends)
387 write64le(buf, s.getVA());
388 }
389
writePltHeader(uint8_t * buf) const390 void X86_64::writePltHeader(uint8_t *buf) const {
391 const uint8_t pltData[] = {
392 0xff, 0x35, 0, 0, 0, 0, // pushq GOTPLT+8(%rip)
393 0xff, 0x25, 0, 0, 0, 0, // jmp *GOTPLT+16(%rip)
394 0x0f, 0x1f, 0x40, 0x00, // nop
395 };
396 memcpy(buf, pltData, sizeof(pltData));
397 uint64_t gotPlt = in.gotPlt->getVA();
398 uint64_t plt = in.ibtPlt ? in.ibtPlt->getVA() : in.plt->getVA();
399 write32le(buf + 2, gotPlt - plt + 2); // GOTPLT+8
400 write32le(buf + 8, gotPlt - plt + 4); // GOTPLT+16
401 }
402
writePlt(uint8_t * buf,const Symbol & sym,uint64_t pltEntryAddr) const403 void X86_64::writePlt(uint8_t *buf, const Symbol &sym,
404 uint64_t pltEntryAddr) const {
405 const uint8_t inst[] = {
406 0xff, 0x25, 0, 0, 0, 0, // jmpq *got(%rip)
407 0x68, 0, 0, 0, 0, // pushq <relocation index>
408 0xe9, 0, 0, 0, 0, // jmpq plt[0]
409 };
410 memcpy(buf, inst, sizeof(inst));
411
412 write32le(buf + 2, sym.getGotPltVA() - pltEntryAddr - 6);
413 write32le(buf + 7, sym.pltIndex);
414 write32le(buf + 12, in.plt->getVA() - pltEntryAddr - 16);
415 }
416
getDynRel(RelType type) const417 RelType X86_64::getDynRel(RelType type) const {
418 if (type == R_X86_64_64 || type == R_X86_64_PC64 || type == R_X86_64_SIZE32 ||
419 type == R_X86_64_SIZE64)
420 return type;
421 return R_X86_64_NONE;
422 }
423
relaxTlsGdToLe(uint8_t * loc,const Relocation & rel,uint64_t val) const424 void X86_64::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
425 uint64_t val) const {
426 if (rel.type == R_X86_64_TLSGD) {
427 // Convert
428 // .byte 0x66
429 // leaq x@tlsgd(%rip), %rdi
430 // .word 0x6666
431 // rex64
432 // call __tls_get_addr@plt
433 // to the following two instructions.
434 const uint8_t inst[] = {
435 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00,
436 0x00, 0x00, // mov %fs:0x0,%rax
437 0x48, 0x8d, 0x80, 0, 0, 0, 0, // lea x@tpoff,%rax
438 };
439 memcpy(loc - 4, inst, sizeof(inst));
440
441 // The original code used a pc relative relocation and so we have to
442 // compensate for the -4 in had in the addend.
443 write32le(loc + 8, val + 4);
444 } else {
445 // Convert
446 // lea x@tlsgd(%rip), %rax
447 // call *(%rax)
448 // to the following two instructions.
449 assert(rel.type == R_X86_64_GOTPC32_TLSDESC);
450 if (memcmp(loc - 3, "\x48\x8d\x05", 3)) {
451 error(getErrorLocation(loc - 3) + "R_X86_64_GOTPC32_TLSDESC must be used "
452 "in callq *x@tlsdesc(%rip), %rax");
453 return;
454 }
455 // movq $x@tpoff(%rip),%rax
456 loc[-2] = 0xc7;
457 loc[-1] = 0xc0;
458 write32le(loc, val + 4);
459 // xchg ax,ax
460 loc[4] = 0x66;
461 loc[5] = 0x90;
462 }
463 }
464
relaxTlsGdToIe(uint8_t * loc,const Relocation & rel,uint64_t val) const465 void X86_64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
466 uint64_t val) const {
467 if (rel.type == R_X86_64_TLSGD) {
468 // Convert
469 // .byte 0x66
470 // leaq x@tlsgd(%rip), %rdi
471 // .word 0x6666
472 // rex64
473 // call __tls_get_addr@plt
474 // to the following two instructions.
475 const uint8_t inst[] = {
476 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00,
477 0x00, 0x00, // mov %fs:0x0,%rax
478 0x48, 0x03, 0x05, 0, 0, 0, 0, // addq x@gottpoff(%rip),%rax
479 };
480 memcpy(loc - 4, inst, sizeof(inst));
481
482 // Both code sequences are PC relatives, but since we are moving the
483 // constant forward by 8 bytes we have to subtract the value by 8.
484 write32le(loc + 8, val - 8);
485 } else {
486 // Convert
487 // lea x@tlsgd(%rip), %rax
488 // call *(%rax)
489 // to the following two instructions.
490 assert(rel.type == R_X86_64_GOTPC32_TLSDESC);
491 if (memcmp(loc - 3, "\x48\x8d\x05", 3)) {
492 error(getErrorLocation(loc - 3) + "R_X86_64_GOTPC32_TLSDESC must be used "
493 "in callq *x@tlsdesc(%rip), %rax");
494 return;
495 }
496 // movq x@gottpoff(%rip),%rax
497 loc[-2] = 0x8b;
498 write32le(loc, val);
499 // xchg ax,ax
500 loc[4] = 0x66;
501 loc[5] = 0x90;
502 }
503 }
504
505 // In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to
506 // R_X86_64_TPOFF32 so that it does not use GOT.
relaxTlsIeToLe(uint8_t * loc,const Relocation &,uint64_t val) const507 void X86_64::relaxTlsIeToLe(uint8_t *loc, const Relocation &,
508 uint64_t val) const {
509 uint8_t *inst = loc - 3;
510 uint8_t reg = loc[-1] >> 3;
511 uint8_t *regSlot = loc - 1;
512
513 // Note that ADD with RSP or R12 is converted to ADD instead of LEA
514 // because LEA with these registers needs 4 bytes to encode and thus
515 // wouldn't fit the space.
516
517 if (memcmp(inst, "\x48\x03\x25", 3) == 0) {
518 // "addq foo@gottpoff(%rip),%rsp" -> "addq $foo,%rsp"
519 memcpy(inst, "\x48\x81\xc4", 3);
520 } else if (memcmp(inst, "\x4c\x03\x25", 3) == 0) {
521 // "addq foo@gottpoff(%rip),%r12" -> "addq $foo,%r12"
522 memcpy(inst, "\x49\x81\xc4", 3);
523 } else if (memcmp(inst, "\x4c\x03", 2) == 0) {
524 // "addq foo@gottpoff(%rip),%r[8-15]" -> "leaq foo(%r[8-15]),%r[8-15]"
525 memcpy(inst, "\x4d\x8d", 2);
526 *regSlot = 0x80 | (reg << 3) | reg;
527 } else if (memcmp(inst, "\x48\x03", 2) == 0) {
528 // "addq foo@gottpoff(%rip),%reg -> "leaq foo(%reg),%reg"
529 memcpy(inst, "\x48\x8d", 2);
530 *regSlot = 0x80 | (reg << 3) | reg;
531 } else if (memcmp(inst, "\x4c\x8b", 2) == 0) {
532 // "movq foo@gottpoff(%rip),%r[8-15]" -> "movq $foo,%r[8-15]"
533 memcpy(inst, "\x49\xc7", 2);
534 *regSlot = 0xc0 | reg;
535 } else if (memcmp(inst, "\x48\x8b", 2) == 0) {
536 // "movq foo@gottpoff(%rip),%reg" -> "movq $foo,%reg"
537 memcpy(inst, "\x48\xc7", 2);
538 *regSlot = 0xc0 | reg;
539 } else {
540 error(getErrorLocation(loc - 3) +
541 "R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only");
542 }
543
544 // The original code used a PC relative relocation.
545 // Need to compensate for the -4 it had in the addend.
546 write32le(loc, val + 4);
547 }
548
relaxTlsLdToLe(uint8_t * loc,const Relocation & rel,uint64_t val) const549 void X86_64::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
550 uint64_t val) const {
551 if (rel.type == R_X86_64_DTPOFF64) {
552 write64le(loc, val);
553 return;
554 }
555 if (rel.type == R_X86_64_DTPOFF32) {
556 write32le(loc, val);
557 return;
558 }
559
560 const uint8_t inst[] = {
561 0x66, 0x66, // .word 0x6666
562 0x66, // .byte 0x66
563 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0,%rax
564 };
565
566 if (loc[4] == 0xe8) {
567 // Convert
568 // leaq bar@tlsld(%rip), %rdi # 48 8d 3d <Loc>
569 // callq __tls_get_addr@PLT # e8 <disp32>
570 // leaq bar@dtpoff(%rax), %rcx
571 // to
572 // .word 0x6666
573 // .byte 0x66
574 // mov %fs:0,%rax
575 // leaq bar@tpoff(%rax), %rcx
576 memcpy(loc - 3, inst, sizeof(inst));
577 return;
578 }
579
580 if (loc[4] == 0xff && loc[5] == 0x15) {
581 // Convert
582 // leaq x@tlsld(%rip),%rdi # 48 8d 3d <Loc>
583 // call *__tls_get_addr@GOTPCREL(%rip) # ff 15 <disp32>
584 // to
585 // .long 0x66666666
586 // movq %fs:0,%rax
587 // See "Table 11.9: LD -> LE Code Transition (LP64)" in
588 // https://raw.githubusercontent.com/wiki/hjl-tools/x86-psABI/x86-64-psABI-1.0.pdf
589 loc[-3] = 0x66;
590 memcpy(loc - 2, inst, sizeof(inst));
591 return;
592 }
593
594 error(getErrorLocation(loc - 3) +
595 "expected R_X86_64_PLT32 or R_X86_64_GOTPCRELX after R_X86_64_TLSLD");
596 }
597
598 // A JumpInstrMod at a specific offset indicates that the jump instruction
599 // opcode at that offset must be modified. This is specifically used to relax
600 // jump instructions with basic block sections. This function looks at the
601 // JumpMod and effects the change.
applyJumpInstrMod(uint8_t * loc,JumpModType type,unsigned size) const602 void X86_64::applyJumpInstrMod(uint8_t *loc, JumpModType type,
603 unsigned size) const {
604 switch (type) {
605 case J_JMP_32:
606 if (size == 4)
607 *loc = 0xe9;
608 else
609 *loc = 0xeb;
610 break;
611 case J_JE_32:
612 if (size == 4) {
613 loc[-1] = 0x0f;
614 *loc = 0x84;
615 } else
616 *loc = 0x74;
617 break;
618 case J_JNE_32:
619 if (size == 4) {
620 loc[-1] = 0x0f;
621 *loc = 0x85;
622 } else
623 *loc = 0x75;
624 break;
625 case J_JG_32:
626 if (size == 4) {
627 loc[-1] = 0x0f;
628 *loc = 0x8f;
629 } else
630 *loc = 0x7f;
631 break;
632 case J_JGE_32:
633 if (size == 4) {
634 loc[-1] = 0x0f;
635 *loc = 0x8d;
636 } else
637 *loc = 0x7d;
638 break;
639 case J_JB_32:
640 if (size == 4) {
641 loc[-1] = 0x0f;
642 *loc = 0x82;
643 } else
644 *loc = 0x72;
645 break;
646 case J_JBE_32:
647 if (size == 4) {
648 loc[-1] = 0x0f;
649 *loc = 0x86;
650 } else
651 *loc = 0x76;
652 break;
653 case J_JL_32:
654 if (size == 4) {
655 loc[-1] = 0x0f;
656 *loc = 0x8c;
657 } else
658 *loc = 0x7c;
659 break;
660 case J_JLE_32:
661 if (size == 4) {
662 loc[-1] = 0x0f;
663 *loc = 0x8e;
664 } else
665 *loc = 0x7e;
666 break;
667 case J_JA_32:
668 if (size == 4) {
669 loc[-1] = 0x0f;
670 *loc = 0x87;
671 } else
672 *loc = 0x77;
673 break;
674 case J_JAE_32:
675 if (size == 4) {
676 loc[-1] = 0x0f;
677 *loc = 0x83;
678 } else
679 *loc = 0x73;
680 break;
681 case J_UNKNOWN:
682 llvm_unreachable("Unknown Jump Relocation");
683 }
684 }
685
getImplicitAddend(const uint8_t * buf,RelType type) const686 int64_t X86_64::getImplicitAddend(const uint8_t *buf, RelType type) const {
687 switch (type) {
688 case R_X86_64_8:
689 case R_X86_64_PC8:
690 return SignExtend64<8>(*buf);
691 case R_X86_64_16:
692 case R_X86_64_PC16:
693 return SignExtend64<16>(read16le(buf));
694 case R_X86_64_32:
695 case R_X86_64_32S:
696 case R_X86_64_TPOFF32:
697 case R_X86_64_GOT32:
698 case R_X86_64_GOTPC32:
699 case R_X86_64_GOTPC32_TLSDESC:
700 case R_X86_64_GOTPCREL:
701 case R_X86_64_GOTPCRELX:
702 case R_X86_64_REX_GOTPCRELX:
703 case R_X86_64_PC32:
704 case R_X86_64_GOTTPOFF:
705 case R_X86_64_PLT32:
706 case R_X86_64_TLSGD:
707 case R_X86_64_TLSLD:
708 case R_X86_64_DTPOFF32:
709 case R_X86_64_SIZE32:
710 return SignExtend64<32>(read32le(buf));
711 case R_X86_64_64:
712 case R_X86_64_TPOFF64:
713 case R_X86_64_DTPOFF64:
714 case R_X86_64_DTPMOD64:
715 case R_X86_64_PC64:
716 case R_X86_64_SIZE64:
717 case R_X86_64_GLOB_DAT:
718 case R_X86_64_GOT64:
719 case R_X86_64_GOTOFF64:
720 case R_X86_64_GOTPC64:
721 case R_X86_64_IRELATIVE:
722 case R_X86_64_RELATIVE:
723 return read64le(buf);
724 case R_X86_64_JUMP_SLOT:
725 case R_X86_64_NONE:
726 // These relocations are defined as not having an implicit addend.
727 return 0;
728 default:
729 internalLinkerError(getErrorLocation(buf),
730 "cannot read addend for relocation " + toString(type));
731 return 0;
732 }
733 }
734
relocate(uint8_t * loc,const Relocation & rel,uint64_t val) const735 void X86_64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
736 switch (rel.type) {
737 case R_X86_64_8:
738 checkIntUInt(loc, val, 8, rel);
739 *loc = val;
740 break;
741 case R_X86_64_PC8:
742 checkInt(loc, val, 8, rel);
743 *loc = val;
744 break;
745 case R_X86_64_16:
746 checkIntUInt(loc, val, 16, rel);
747 write16le(loc, val);
748 break;
749 case R_X86_64_PC16:
750 checkInt(loc, val, 16, rel);
751 write16le(loc, val);
752 break;
753 case R_X86_64_32:
754 checkUInt(loc, val, 32, rel);
755 write32le(loc, val);
756 break;
757 case R_X86_64_32S:
758 case R_X86_64_TPOFF32:
759 case R_X86_64_GOT32:
760 case R_X86_64_GOTPC32:
761 case R_X86_64_GOTPC32_TLSDESC:
762 case R_X86_64_GOTPCREL:
763 case R_X86_64_GOTPCRELX:
764 case R_X86_64_REX_GOTPCRELX:
765 case R_X86_64_PC32:
766 case R_X86_64_GOTTPOFF:
767 case R_X86_64_PLT32:
768 case R_X86_64_TLSGD:
769 case R_X86_64_TLSLD:
770 case R_X86_64_DTPOFF32:
771 case R_X86_64_SIZE32:
772 checkInt(loc, val, 32, rel);
773 write32le(loc, val);
774 break;
775 case R_X86_64_64:
776 case R_X86_64_DTPOFF64:
777 case R_X86_64_PC64:
778 case R_X86_64_SIZE64:
779 case R_X86_64_GOT64:
780 case R_X86_64_GOTOFF64:
781 case R_X86_64_GOTPC64:
782 write64le(loc, val);
783 break;
784 default:
785 llvm_unreachable("unknown relocation");
786 }
787 }
788
adjustGotPcExpr(RelType type,int64_t addend,const uint8_t * loc) const789 RelExpr X86_64::adjustGotPcExpr(RelType type, int64_t addend,
790 const uint8_t *loc) const {
791 // Only R_X86_64_[REX_]GOTPCRELX can be relaxed. GNU as may emit GOTPCRELX
792 // with addend != -4. Such an instruction does not load the full GOT entry, so
793 // we cannot relax the relocation. E.g. movl x@GOTPCREL+4(%rip), %rax
794 // (addend=0) loads the high 32 bits of the GOT entry.
795 if ((type != R_X86_64_GOTPCRELX && type != R_X86_64_REX_GOTPCRELX) ||
796 addend != -4)
797 return R_GOT_PC;
798 const uint8_t op = loc[-2];
799 const uint8_t modRm = loc[-1];
800
801 // FIXME: When PIC is disabled and foo is defined locally in the
802 // lower 32 bit address space, memory operand in mov can be converted into
803 // immediate operand. Otherwise, mov must be changed to lea. We support only
804 // latter relaxation at this moment.
805 if (op == 0x8b)
806 return R_RELAX_GOT_PC;
807
808 // Relax call and jmp.
809 if (op == 0xff && (modRm == 0x15 || modRm == 0x25))
810 return R_RELAX_GOT_PC;
811
812 // We don't support test/binop instructions without a REX prefix.
813 if (type == R_X86_64_GOTPCRELX)
814 return R_GOT_PC;
815
816 // Relaxation of test, adc, add, and, cmp, or, sbb, sub, xor.
817 // If PIC then no relaxation is available.
818 return config->isPic ? R_GOT_PC : R_RELAX_GOT_PC_NOPIC;
819 }
820
821 // A subset of relaxations can only be applied for no-PIC. This method
822 // handles such relaxations. Instructions encoding information was taken from:
823 // "Intel 64 and IA-32 Architectures Software Developer's Manual V2"
824 // (http://www.intel.com/content/dam/www/public/us/en/documents/manuals/
825 // 64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf)
relaxGotNoPic(uint8_t * loc,uint64_t val,uint8_t op,uint8_t modRm)826 static void relaxGotNoPic(uint8_t *loc, uint64_t val, uint8_t op,
827 uint8_t modRm) {
828 const uint8_t rex = loc[-3];
829 // Convert "test %reg, foo@GOTPCREL(%rip)" to "test $foo, %reg".
830 if (op == 0x85) {
831 // See "TEST-Logical Compare" (4-428 Vol. 2B),
832 // TEST r/m64, r64 uses "full" ModR / M byte (no opcode extension).
833
834 // ModR/M byte has form XX YYY ZZZ, where
835 // YYY is MODRM.reg(register 2), ZZZ is MODRM.rm(register 1).
836 // XX has different meanings:
837 // 00: The operand's memory address is in reg1.
838 // 01: The operand's memory address is reg1 + a byte-sized displacement.
839 // 10: The operand's memory address is reg1 + a word-sized displacement.
840 // 11: The operand is reg1 itself.
841 // If an instruction requires only one operand, the unused reg2 field
842 // holds extra opcode bits rather than a register code
843 // 0xC0 == 11 000 000 binary.
844 // 0x38 == 00 111 000 binary.
845 // We transfer reg2 to reg1 here as operand.
846 // See "2.1.3 ModR/M and SIB Bytes" (Vol. 2A 2-3).
847 loc[-1] = 0xc0 | (modRm & 0x38) >> 3; // ModR/M byte.
848
849 // Change opcode from TEST r/m64, r64 to TEST r/m64, imm32
850 // See "TEST-Logical Compare" (4-428 Vol. 2B).
851 loc[-2] = 0xf7;
852
853 // Move R bit to the B bit in REX byte.
854 // REX byte is encoded as 0100WRXB, where
855 // 0100 is 4bit fixed pattern.
856 // REX.W When 1, a 64-bit operand size is used. Otherwise, when 0, the
857 // default operand size is used (which is 32-bit for most but not all
858 // instructions).
859 // REX.R This 1-bit value is an extension to the MODRM.reg field.
860 // REX.X This 1-bit value is an extension to the SIB.index field.
861 // REX.B This 1-bit value is an extension to the MODRM.rm field or the
862 // SIB.base field.
863 // See "2.2.1.2 More on REX Prefix Fields " (2-8 Vol. 2A).
864 loc[-3] = (rex & ~0x4) | (rex & 0x4) >> 2;
865 write32le(loc, val);
866 return;
867 }
868
869 // If we are here then we need to relax the adc, add, and, cmp, or, sbb, sub
870 // or xor operations.
871
872 // Convert "binop foo@GOTPCREL(%rip), %reg" to "binop $foo, %reg".
873 // Logic is close to one for test instruction above, but we also
874 // write opcode extension here, see below for details.
875 loc[-1] = 0xc0 | (modRm & 0x38) >> 3 | (op & 0x3c); // ModR/M byte.
876
877 // Primary opcode is 0x81, opcode extension is one of:
878 // 000b = ADD, 001b is OR, 010b is ADC, 011b is SBB,
879 // 100b is AND, 101b is SUB, 110b is XOR, 111b is CMP.
880 // This value was wrote to MODRM.reg in a line above.
881 // See "3.2 INSTRUCTIONS (A-M)" (Vol. 2A 3-15),
882 // "INSTRUCTION SET REFERENCE, N-Z" (Vol. 2B 4-1) for
883 // descriptions about each operation.
884 loc[-2] = 0x81;
885 loc[-3] = (rex & ~0x4) | (rex & 0x4) >> 2;
886 write32le(loc, val);
887 }
888
relaxGot(uint8_t * loc,const Relocation & rel,uint64_t val) const889 void X86_64::relaxGot(uint8_t *loc, const Relocation &rel, uint64_t val) const {
890 checkInt(loc, val, 32, rel);
891 const uint8_t op = loc[-2];
892 const uint8_t modRm = loc[-1];
893
894 // Convert "mov foo@GOTPCREL(%rip),%reg" to "lea foo(%rip),%reg".
895 if (op == 0x8b) {
896 loc[-2] = 0x8d;
897 write32le(loc, val);
898 return;
899 }
900
901 if (op != 0xff) {
902 // We are relaxing a rip relative to an absolute, so compensate
903 // for the old -4 addend.
904 assert(!config->isPic);
905 relaxGotNoPic(loc, val + 4, op, modRm);
906 return;
907 }
908
909 // Convert call/jmp instructions.
910 if (modRm == 0x15) {
911 // ABI says we can convert "call *foo@GOTPCREL(%rip)" to "nop; call foo".
912 // Instead we convert to "addr32 call foo" where addr32 is an instruction
913 // prefix. That makes result expression to be a single instruction.
914 loc[-2] = 0x67; // addr32 prefix
915 loc[-1] = 0xe8; // call
916 write32le(loc, val);
917 return;
918 }
919
920 // Convert "jmp *foo@GOTPCREL(%rip)" to "jmp foo; nop".
921 // jmp doesn't return, so it is fine to use nop here, it is just a stub.
922 assert(modRm == 0x25);
923 loc[-2] = 0xe9; // jmp
924 loc[3] = 0x90; // nop
925 write32le(loc - 1, val + 1);
926 }
927
928 // A split-stack prologue starts by checking the amount of stack remaining
929 // in one of two ways:
930 // A) Comparing of the stack pointer to a field in the tcb.
931 // B) Or a load of a stack pointer offset with an lea to r10 or r11.
adjustPrologueForCrossSplitStack(uint8_t * loc,uint8_t * end,uint8_t stOther) const932 bool X86_64::adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end,
933 uint8_t stOther) const {
934 if (!config->is64) {
935 error("Target doesn't support split stacks.");
936 return false;
937 }
938
939 if (loc + 8 >= end)
940 return false;
941
942 // Replace "cmp %fs:0x70,%rsp" and subsequent branch
943 // with "stc, nopl 0x0(%rax,%rax,1)"
944 if (memcmp(loc, "\x64\x48\x3b\x24\x25", 5) == 0) {
945 memcpy(loc, "\xf9\x0f\x1f\x84\x00\x00\x00\x00", 8);
946 return true;
947 }
948
949 // Adjust "lea X(%rsp),%rYY" to lea "(X - 0x4000)(%rsp),%rYY" where rYY could
950 // be r10 or r11. The lea instruction feeds a subsequent compare which checks
951 // if there is X available stack space. Making X larger effectively reserves
952 // that much additional space. The stack grows downward so subtract the value.
953 if (memcmp(loc, "\x4c\x8d\x94\x24", 4) == 0 ||
954 memcmp(loc, "\x4c\x8d\x9c\x24", 4) == 0) {
955 // The offset bytes are encoded four bytes after the start of the
956 // instruction.
957 write32le(loc + 4, read32le(loc + 4) - 0x4000);
958 return true;
959 }
960 return false;
961 }
962
963 // If Intel Indirect Branch Tracking is enabled, we have to emit special PLT
964 // entries containing endbr64 instructions. A PLT entry will be split into two
965 // parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt).
966 namespace {
967 class IntelIBT : public X86_64 {
968 public:
969 IntelIBT();
970 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
971 void writePlt(uint8_t *buf, const Symbol &sym,
972 uint64_t pltEntryAddr) const override;
973 void writeIBTPlt(uint8_t *buf, size_t numEntries) const override;
974
975 static const unsigned IBTPltHeaderSize = 16;
976 };
977 } // namespace
978
IntelIBT()979 IntelIBT::IntelIBT() { pltHeaderSize = 0; }
980
writeGotPlt(uint8_t * buf,const Symbol & s) const981 void IntelIBT::writeGotPlt(uint8_t *buf, const Symbol &s) const {
982 uint64_t va =
983 in.ibtPlt->getVA() + IBTPltHeaderSize + s.pltIndex * pltEntrySize;
984 write64le(buf, va);
985 }
986
writePlt(uint8_t * buf,const Symbol & sym,uint64_t pltEntryAddr) const987 void IntelIBT::writePlt(uint8_t *buf, const Symbol &sym,
988 uint64_t pltEntryAddr) const {
989 const uint8_t Inst[] = {
990 0xf3, 0x0f, 0x1e, 0xfa, // endbr64
991 0xff, 0x25, 0, 0, 0, 0, // jmpq *got(%rip)
992 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
993 };
994 memcpy(buf, Inst, sizeof(Inst));
995 write32le(buf + 6, sym.getGotPltVA() - pltEntryAddr - 10);
996 }
997
writeIBTPlt(uint8_t * buf,size_t numEntries) const998 void IntelIBT::writeIBTPlt(uint8_t *buf, size_t numEntries) const {
999 writePltHeader(buf);
1000 buf += IBTPltHeaderSize;
1001
1002 const uint8_t inst[] = {
1003 0xf3, 0x0f, 0x1e, 0xfa, // endbr64
1004 0x68, 0, 0, 0, 0, // pushq <relocation index>
1005 0xe9, 0, 0, 0, 0, // jmpq plt[0]
1006 0x66, 0x90, // nop
1007 };
1008
1009 for (size_t i = 0; i < numEntries; ++i) {
1010 memcpy(buf, inst, sizeof(inst));
1011 write32le(buf + 5, i);
1012 write32le(buf + 10, -pltHeaderSize - sizeof(inst) * i - 30);
1013 buf += sizeof(inst);
1014 }
1015 }
1016
1017 // These nonstandard PLT entries are to migtigate Spectre v2 security
1018 // vulnerability. In order to mitigate Spectre v2, we want to avoid indirect
1019 // branch instructions such as `jmp *GOTPLT(%rip)`. So, in the following PLT
1020 // entries, we use a CALL followed by MOV and RET to do the same thing as an
1021 // indirect jump. That instruction sequence is so-called "retpoline".
1022 //
1023 // We have two types of retpoline PLTs as a size optimization. If `-z now`
1024 // is specified, all dynamic symbols are resolved at load-time. Thus, when
1025 // that option is given, we can omit code for symbol lazy resolution.
1026 namespace {
1027 class Retpoline : public X86_64 {
1028 public:
1029 Retpoline();
1030 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
1031 void writePltHeader(uint8_t *buf) const override;
1032 void writePlt(uint8_t *buf, const Symbol &sym,
1033 uint64_t pltEntryAddr) const override;
1034 };
1035
1036 class RetpolineZNow : public X86_64 {
1037 public:
1038 RetpolineZNow();
writeGotPlt(uint8_t * buf,const Symbol & s) const1039 void writeGotPlt(uint8_t *buf, const Symbol &s) const override {}
1040 void writePltHeader(uint8_t *buf) const override;
1041 void writePlt(uint8_t *buf, const Symbol &sym,
1042 uint64_t pltEntryAddr) const override;
1043 };
1044 } // namespace
1045
Retpoline()1046 Retpoline::Retpoline() {
1047 pltHeaderSize = 48;
1048 pltEntrySize = 32;
1049 ipltEntrySize = 32;
1050 }
1051
writeGotPlt(uint8_t * buf,const Symbol & s) const1052 void Retpoline::writeGotPlt(uint8_t *buf, const Symbol &s) const {
1053 write64le(buf, s.getPltVA() + 17);
1054 }
1055
writePltHeader(uint8_t * buf) const1056 void Retpoline::writePltHeader(uint8_t *buf) const {
1057 const uint8_t insn[] = {
1058 0xff, 0x35, 0, 0, 0, 0, // 0: pushq GOTPLT+8(%rip)
1059 0x4c, 0x8b, 0x1d, 0, 0, 0, 0, // 6: mov GOTPLT+16(%rip), %r11
1060 0xe8, 0x0e, 0x00, 0x00, 0x00, // d: callq next
1061 0xf3, 0x90, // 12: loop: pause
1062 0x0f, 0xae, 0xe8, // 14: lfence
1063 0xeb, 0xf9, // 17: jmp loop
1064 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19: int3; .align 16
1065 0x4c, 0x89, 0x1c, 0x24, // 20: next: mov %r11, (%rsp)
1066 0xc3, // 24: ret
1067 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 25: int3; padding
1068 0xcc, 0xcc, 0xcc, 0xcc, // 2c: int3; padding
1069 };
1070 memcpy(buf, insn, sizeof(insn));
1071
1072 uint64_t gotPlt = in.gotPlt->getVA();
1073 uint64_t plt = in.plt->getVA();
1074 write32le(buf + 2, gotPlt - plt - 6 + 8);
1075 write32le(buf + 9, gotPlt - plt - 13 + 16);
1076 }
1077
writePlt(uint8_t * buf,const Symbol & sym,uint64_t pltEntryAddr) const1078 void Retpoline::writePlt(uint8_t *buf, const Symbol &sym,
1079 uint64_t pltEntryAddr) const {
1080 const uint8_t insn[] = {
1081 0x4c, 0x8b, 0x1d, 0, 0, 0, 0, // 0: mov foo@GOTPLT(%rip), %r11
1082 0xe8, 0, 0, 0, 0, // 7: callq plt+0x20
1083 0xe9, 0, 0, 0, 0, // c: jmp plt+0x12
1084 0x68, 0, 0, 0, 0, // 11: pushq <relocation index>
1085 0xe9, 0, 0, 0, 0, // 16: jmp plt+0
1086 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1b: int3; padding
1087 };
1088 memcpy(buf, insn, sizeof(insn));
1089
1090 uint64_t off = pltEntryAddr - in.plt->getVA();
1091
1092 write32le(buf + 3, sym.getGotPltVA() - pltEntryAddr - 7);
1093 write32le(buf + 8, -off - 12 + 32);
1094 write32le(buf + 13, -off - 17 + 18);
1095 write32le(buf + 18, sym.pltIndex);
1096 write32le(buf + 23, -off - 27);
1097 }
1098
RetpolineZNow()1099 RetpolineZNow::RetpolineZNow() {
1100 pltHeaderSize = 32;
1101 pltEntrySize = 16;
1102 ipltEntrySize = 16;
1103 }
1104
writePltHeader(uint8_t * buf) const1105 void RetpolineZNow::writePltHeader(uint8_t *buf) const {
1106 const uint8_t insn[] = {
1107 0xe8, 0x0b, 0x00, 0x00, 0x00, // 0: call next
1108 0xf3, 0x90, // 5: loop: pause
1109 0x0f, 0xae, 0xe8, // 7: lfence
1110 0xeb, 0xf9, // a: jmp loop
1111 0xcc, 0xcc, 0xcc, 0xcc, // c: int3; .align 16
1112 0x4c, 0x89, 0x1c, 0x24, // 10: next: mov %r11, (%rsp)
1113 0xc3, // 14: ret
1114 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 15: int3; padding
1115 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding
1116 0xcc, // 1f: int3; padding
1117 };
1118 memcpy(buf, insn, sizeof(insn));
1119 }
1120
writePlt(uint8_t * buf,const Symbol & sym,uint64_t pltEntryAddr) const1121 void RetpolineZNow::writePlt(uint8_t *buf, const Symbol &sym,
1122 uint64_t pltEntryAddr) const {
1123 const uint8_t insn[] = {
1124 0x4c, 0x8b, 0x1d, 0, 0, 0, 0, // mov foo@GOTPLT(%rip), %r11
1125 0xe9, 0, 0, 0, 0, // jmp plt+0
1126 0xcc, 0xcc, 0xcc, 0xcc, // int3; padding
1127 };
1128 memcpy(buf, insn, sizeof(insn));
1129
1130 write32le(buf + 3, sym.getGotPltVA() - pltEntryAddr - 7);
1131 write32le(buf + 8, in.plt->getVA() - pltEntryAddr - 12);
1132 }
1133
getTargetInfo()1134 static TargetInfo *getTargetInfo() {
1135 if (config->zRetpolineplt) {
1136 if (config->zNow) {
1137 static RetpolineZNow t;
1138 return &t;
1139 }
1140 static Retpoline t;
1141 return &t;
1142 }
1143
1144 if (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT) {
1145 static IntelIBT t;
1146 return &t;
1147 }
1148
1149 static X86_64 t;
1150 return &t;
1151 }
1152
getX86_64TargetInfo()1153 TargetInfo *elf::getX86_64TargetInfo() { return getTargetInfo(); }
1154