1 //===- X86.cpp ------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "InputFiles.h"
10 #include "Symbols.h"
11 #include "SyntheticSections.h"
12 #include "Target.h"
13 #include "lld/Common/ErrorHandler.h"
14 #include "llvm/Support/Endian.h"
15 
16 using namespace llvm;
17 using namespace llvm::support::endian;
18 using namespace llvm::ELF;
19 using namespace lld;
20 using namespace lld::elf;
21 
22 namespace {
23 class X86 : public TargetInfo {
24 public:
25   X86();
26   int getTlsGdRelaxSkip(RelType type) const override;
27   RelExpr getRelExpr(RelType type, const Symbol &s,
28                      const uint8_t *loc) const override;
29   int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
30   void writeGotPltHeader(uint8_t *buf) const override;
31   RelType getDynRel(RelType type) const override;
32   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
33   void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
34   void writePltHeader(uint8_t *buf) const override;
35   void writePlt(uint8_t *buf, const Symbol &sym,
36                 uint64_t pltEntryAddr) const override;
37   void relocate(uint8_t *loc, const Relocation &rel,
38                 uint64_t val) const override;
39 
40   RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
41   void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
42                       uint64_t val) const override;
43   void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
44                       uint64_t val) const override;
45   void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
46                       uint64_t val) const override;
47   void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
48                       uint64_t val) const override;
49 };
50 } // namespace
51 
X86()52 X86::X86() {
53   copyRel = R_386_COPY;
54   gotRel = R_386_GLOB_DAT;
55   noneRel = R_386_NONE;
56   pltRel = R_386_JUMP_SLOT;
57   iRelativeRel = R_386_IRELATIVE;
58   relativeRel = R_386_RELATIVE;
59   symbolicRel = R_386_32;
60   tlsGotRel = R_386_TLS_TPOFF;
61   tlsModuleIndexRel = R_386_TLS_DTPMOD32;
62   tlsOffsetRel = R_386_TLS_DTPOFF32;
63   pltHeaderSize = 16;
64   pltEntrySize = 16;
65   ipltEntrySize = 16;
66   trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
67 
68   // Align to the non-PAE large page size (known as a superpage or huge page).
69   // FreeBSD automatically promotes large, superpage-aligned allocations.
70   defaultImageBase = 0x400000;
71 }
72 
getTlsGdRelaxSkip(RelType type) const73 int X86::getTlsGdRelaxSkip(RelType type) const {
74   return 2;
75 }
76 
getRelExpr(RelType type,const Symbol & s,const uint8_t * loc) const77 RelExpr X86::getRelExpr(RelType type, const Symbol &s,
78                         const uint8_t *loc) const {
79   // There are 4 different TLS variable models with varying degrees of
80   // flexibility and performance. LocalExec and InitialExec models are fast but
81   // less-flexible models. If they are in use, we set DF_STATIC_TLS flag in the
82   // dynamic section to let runtime know about that.
83   if (type == R_386_TLS_LE || type == R_386_TLS_LE_32 || type == R_386_TLS_IE ||
84       type == R_386_TLS_GOTIE)
85     config->hasStaticTlsModel = true;
86 
87   switch (type) {
88   case R_386_8:
89   case R_386_16:
90   case R_386_32:
91     return R_ABS;
92   case R_386_TLS_LDO_32:
93     return R_DTPREL;
94   case R_386_TLS_GD:
95     return R_TLSGD_GOTPLT;
96   case R_386_TLS_LDM:
97     return R_TLSLD_GOTPLT;
98   case R_386_PLT32:
99     return R_PLT_PC;
100   case R_386_PC8:
101   case R_386_PC16:
102   case R_386_PC32:
103     return R_PC;
104   case R_386_GOTPC:
105     return R_GOTPLTONLY_PC;
106   case R_386_TLS_IE:
107     return R_GOT;
108   case R_386_GOT32:
109   case R_386_GOT32X:
110     // These relocations are arguably mis-designed because their calculations
111     // depend on the instructions they are applied to. This is bad because we
112     // usually don't care about whether the target section contains valid
113     // machine instructions or not. But this is part of the documented ABI, so
114     // we had to implement as the standard requires.
115     //
116     // x86 does not support PC-relative data access. Therefore, in order to
117     // access GOT contents, a GOT address needs to be known at link-time
118     // (which means non-PIC) or compilers have to emit code to get a GOT
119     // address at runtime (which means code is position-independent but
120     // compilers need to emit extra code for each GOT access.) This decision
121     // is made at compile-time. In the latter case, compilers emit code to
122     // load a GOT address to a register, which is usually %ebx.
123     //
124     // So, there are two ways to refer to symbol foo's GOT entry: foo@GOT or
125     // foo@GOT(%ebx).
126     //
127     // foo@GOT is not usable in PIC. If we are creating a PIC output and if we
128     // find such relocation, we should report an error. foo@GOT is resolved to
129     // an *absolute* address of foo's GOT entry, because both GOT address and
130     // foo's offset are known. In other words, it's G + A.
131     //
132     // foo@GOT(%ebx) needs to be resolved to a *relative* offset from a GOT to
133     // foo's GOT entry in the table, because GOT address is not known but foo's
134     // offset in the table is known. It's G + A - GOT.
135     //
136     // It's unfortunate that compilers emit the same relocation for these
137     // different use cases. In order to distinguish them, we have to read a
138     // machine instruction.
139     //
140     // The following code implements it. We assume that Loc[0] is the first byte
141     // of a displacement or an immediate field of a valid machine
142     // instruction. That means a ModRM byte is at Loc[-1]. By taking a look at
143     // the byte, we can determine whether the instruction uses the operand as an
144     // absolute address (R_GOT) or a register-relative address (R_GOTPLT).
145     return (loc[-1] & 0xc7) == 0x5 ? R_GOT : R_GOTPLT;
146   case R_386_TLS_GOTIE:
147     return R_GOTPLT;
148   case R_386_GOTOFF:
149     return R_GOTPLTREL;
150   case R_386_TLS_LE:
151     return R_TPREL;
152   case R_386_TLS_LE_32:
153     return R_TPREL_NEG;
154   case R_386_NONE:
155     return R_NONE;
156   default:
157     error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
158           ") against symbol " + toString(s));
159     return R_NONE;
160   }
161 }
162 
adjustTlsExpr(RelType type,RelExpr expr) const163 RelExpr X86::adjustTlsExpr(RelType type, RelExpr expr) const {
164   switch (expr) {
165   default:
166     return expr;
167   case R_RELAX_TLS_GD_TO_IE:
168     return R_RELAX_TLS_GD_TO_IE_GOTPLT;
169   case R_RELAX_TLS_GD_TO_LE:
170     return R_RELAX_TLS_GD_TO_LE_NEG;
171   }
172 }
173 
writeGotPltHeader(uint8_t * buf) const174 void X86::writeGotPltHeader(uint8_t *buf) const {
175   write32le(buf, mainPart->dynamic->getVA());
176 }
177 
writeGotPlt(uint8_t * buf,const Symbol & s) const178 void X86::writeGotPlt(uint8_t *buf, const Symbol &s) const {
179   // Entries in .got.plt initially points back to the corresponding
180   // PLT entries with a fixed offset to skip the first instruction.
181   write32le(buf, s.getPltVA() + 6);
182 }
183 
writeIgotPlt(uint8_t * buf,const Symbol & s) const184 void X86::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
185   // An x86 entry is the address of the ifunc resolver function.
186   write32le(buf, s.getVA());
187 }
188 
getDynRel(RelType type) const189 RelType X86::getDynRel(RelType type) const {
190   if (type == R_386_TLS_LE)
191     return R_386_TLS_TPOFF;
192   if (type == R_386_TLS_LE_32)
193     return R_386_TLS_TPOFF32;
194   return type;
195 }
196 
writePltHeader(uint8_t * buf) const197 void X86::writePltHeader(uint8_t *buf) const {
198   if (config->isPic) {
199     const uint8_t v[] = {
200         0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl 4(%ebx)
201         0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *8(%ebx)
202         0x90, 0x90, 0x90, 0x90              // nop
203     };
204     memcpy(buf, v, sizeof(v));
205     return;
206   }
207 
208   const uint8_t pltData[] = {
209       0xff, 0x35, 0, 0, 0, 0, // pushl (GOTPLT+4)
210       0xff, 0x25, 0, 0, 0, 0, // jmp *(GOTPLT+8)
211       0x90, 0x90, 0x90, 0x90, // nop
212   };
213   memcpy(buf, pltData, sizeof(pltData));
214   uint32_t gotPlt = in.gotPlt->getVA();
215   write32le(buf + 2, gotPlt + 4);
216   write32le(buf + 8, gotPlt + 8);
217 }
218 
writePlt(uint8_t * buf,const Symbol & sym,uint64_t pltEntryAddr) const219 void X86::writePlt(uint8_t *buf, const Symbol &sym,
220                    uint64_t pltEntryAddr) const {
221   unsigned relOff = in.relaPlt->entsize * sym.pltIndex;
222   if (config->isPic) {
223     const uint8_t inst[] = {
224         0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx)
225         0x68, 0,    0, 0, 0,    // pushl $reloc_offset
226         0xe9, 0,    0, 0, 0,    // jmp .PLT0@PC
227     };
228     memcpy(buf, inst, sizeof(inst));
229     write32le(buf + 2, sym.getGotPltVA() - in.gotPlt->getVA());
230   } else {
231     const uint8_t inst[] = {
232         0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
233         0x68, 0,    0, 0, 0,    // pushl $reloc_offset
234         0xe9, 0,    0, 0, 0,    // jmp .PLT0@PC
235     };
236     memcpy(buf, inst, sizeof(inst));
237     write32le(buf + 2, sym.getGotPltVA());
238   }
239 
240   write32le(buf + 7, relOff);
241   write32le(buf + 12, in.plt->getVA() - pltEntryAddr - 16);
242 }
243 
getImplicitAddend(const uint8_t * buf,RelType type) const244 int64_t X86::getImplicitAddend(const uint8_t *buf, RelType type) const {
245   switch (type) {
246   case R_386_8:
247   case R_386_PC8:
248     return SignExtend64<8>(*buf);
249   case R_386_16:
250   case R_386_PC16:
251     return SignExtend64<16>(read16le(buf));
252   case R_386_32:
253   case R_386_GLOB_DAT:
254   case R_386_GOT32:
255   case R_386_GOT32X:
256   case R_386_GOTOFF:
257   case R_386_GOTPC:
258   case R_386_IRELATIVE:
259   case R_386_PC32:
260   case R_386_PLT32:
261   case R_386_RELATIVE:
262   case R_386_TLS_DTPMOD32:
263   case R_386_TLS_DTPOFF32:
264   case R_386_TLS_LDO_32:
265   case R_386_TLS_LDM:
266   case R_386_TLS_IE:
267   case R_386_TLS_IE_32:
268   case R_386_TLS_LE:
269   case R_386_TLS_LE_32:
270   case R_386_TLS_GD:
271   case R_386_TLS_GD_32:
272   case R_386_TLS_GOTIE:
273   case R_386_TLS_TPOFF:
274   case R_386_TLS_TPOFF32:
275     return SignExtend64<32>(read32le(buf));
276   case R_386_NONE:
277   case R_386_JUMP_SLOT:
278     // These relocations are defined as not having an implicit addend.
279     return 0;
280   default:
281     internalLinkerError(getErrorLocation(buf),
282                         "cannot read addend for relocation " + toString(type));
283     return 0;
284   }
285 }
286 
relocate(uint8_t * loc,const Relocation & rel,uint64_t val) const287 void X86::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
288   switch (rel.type) {
289   case R_386_8:
290     // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are
291     // being used for some 16-bit programs such as boot loaders, so
292     // we want to support them.
293     checkIntUInt(loc, val, 8, rel);
294     *loc = val;
295     break;
296   case R_386_PC8:
297     checkInt(loc, val, 8, rel);
298     *loc = val;
299     break;
300   case R_386_16:
301     checkIntUInt(loc, val, 16, rel);
302     write16le(loc, val);
303     break;
304   case R_386_PC16:
305     // R_386_PC16 is normally used with 16 bit code. In that situation
306     // the PC is 16 bits, just like the addend. This means that it can
307     // point from any 16 bit address to any other if the possibility
308     // of wrapping is included.
309     // The only restriction we have to check then is that the destination
310     // address fits in 16 bits. That is impossible to do here. The problem is
311     // that we are passed the final value, which already had the
312     // current location subtracted from it.
313     // We just check that Val fits in 17 bits. This misses some cases, but
314     // should have no false positives.
315     checkInt(loc, val, 17, rel);
316     write16le(loc, val);
317     break;
318   case R_386_32:
319   case R_386_GOT32:
320   case R_386_GOT32X:
321   case R_386_GOTOFF:
322   case R_386_GOTPC:
323   case R_386_PC32:
324   case R_386_PLT32:
325   case R_386_RELATIVE:
326   case R_386_TLS_DTPMOD32:
327   case R_386_TLS_DTPOFF32:
328   case R_386_TLS_GD:
329   case R_386_TLS_GOTIE:
330   case R_386_TLS_IE:
331   case R_386_TLS_LDM:
332   case R_386_TLS_LDO_32:
333   case R_386_TLS_LE:
334   case R_386_TLS_LE_32:
335   case R_386_TLS_TPOFF:
336   case R_386_TLS_TPOFF32:
337     checkInt(loc, val, 32, rel);
338     write32le(loc, val);
339     break;
340   default:
341     llvm_unreachable("unknown relocation");
342   }
343 }
344 
relaxTlsGdToLe(uint8_t * loc,const Relocation &,uint64_t val) const345 void X86::relaxTlsGdToLe(uint8_t *loc, const Relocation &, uint64_t val) const {
346   // Convert
347   //   leal x@tlsgd(, %ebx, 1),
348   //   call __tls_get_addr@plt
349   // to
350   //   movl %gs:0,%eax
351   //   subl $x@ntpoff,%eax
352   const uint8_t inst[] = {
353       0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
354       0x81, 0xe8, 0, 0, 0, 0,             // subl Val(%ebx), %eax
355   };
356   memcpy(loc - 3, inst, sizeof(inst));
357   write32le(loc + 5, val);
358 }
359 
relaxTlsGdToIe(uint8_t * loc,const Relocation &,uint64_t val) const360 void X86::relaxTlsGdToIe(uint8_t *loc, const Relocation &, uint64_t val) const {
361   // Convert
362   //   leal x@tlsgd(, %ebx, 1),
363   //   call __tls_get_addr@plt
364   // to
365   //   movl %gs:0, %eax
366   //   addl x@gotntpoff(%ebx), %eax
367   const uint8_t inst[] = {
368       0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
369       0x03, 0x83, 0, 0, 0, 0,             // addl Val(%ebx), %eax
370   };
371   memcpy(loc - 3, inst, sizeof(inst));
372   write32le(loc + 5, val);
373 }
374 
375 // In some conditions, relocations can be optimized to avoid using GOT.
376 // This function does that for Initial Exec to Local Exec case.
relaxTlsIeToLe(uint8_t * loc,const Relocation & rel,uint64_t val) const377 void X86::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
378                          uint64_t val) const {
379   // Ulrich's document section 6.2 says that @gotntpoff can
380   // be used with MOVL or ADDL instructions.
381   // @indntpoff is similar to @gotntpoff, but for use in
382   // position dependent code.
383   uint8_t reg = (loc[-1] >> 3) & 7;
384 
385   if (rel.type == R_386_TLS_IE) {
386     if (loc[-1] == 0xa1) {
387       // "movl foo@indntpoff,%eax" -> "movl $foo,%eax"
388       // This case is different from the generic case below because
389       // this is a 5 byte instruction while below is 6 bytes.
390       loc[-1] = 0xb8;
391     } else if (loc[-2] == 0x8b) {
392       // "movl foo@indntpoff,%reg" -> "movl $foo,%reg"
393       loc[-2] = 0xc7;
394       loc[-1] = 0xc0 | reg;
395     } else {
396       // "addl foo@indntpoff,%reg" -> "addl $foo,%reg"
397       loc[-2] = 0x81;
398       loc[-1] = 0xc0 | reg;
399     }
400   } else {
401     assert(rel.type == R_386_TLS_GOTIE);
402     if (loc[-2] == 0x8b) {
403       // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg"
404       loc[-2] = 0xc7;
405       loc[-1] = 0xc0 | reg;
406     } else {
407       // "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg"
408       loc[-2] = 0x8d;
409       loc[-1] = 0x80 | (reg << 3) | reg;
410     }
411   }
412   write32le(loc, val);
413 }
414 
relaxTlsLdToLe(uint8_t * loc,const Relocation & rel,uint64_t val) const415 void X86::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
416                          uint64_t val) const {
417   if (rel.type == R_386_TLS_LDO_32) {
418     write32le(loc, val);
419     return;
420   }
421 
422   // Convert
423   //   leal foo(%reg),%eax
424   //   call ___tls_get_addr
425   // to
426   //   movl %gs:0,%eax
427   //   nop
428   //   leal 0(%esi,1),%esi
429   const uint8_t inst[] = {
430       0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
431       0x90,                               // nop
432       0x8d, 0x74, 0x26, 0x00,             // leal 0(%esi,1),%esi
433   };
434   memcpy(loc - 2, inst, sizeof(inst));
435 }
436 
437 // If Intel Indirect Branch Tracking is enabled, we have to emit special PLT
438 // entries containing endbr32 instructions. A PLT entry will be split into two
439 // parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt).
440 namespace {
441 class IntelIBT : public X86 {
442 public:
443   IntelIBT();
444   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
445   void writePlt(uint8_t *buf, const Symbol &sym,
446                 uint64_t pltEntryAddr) const override;
447   void writeIBTPlt(uint8_t *buf, size_t numEntries) const override;
448 
449   static const unsigned IBTPltHeaderSize = 16;
450 };
451 } // namespace
452 
IntelIBT()453 IntelIBT::IntelIBT() { pltHeaderSize = 0; }
454 
writeGotPlt(uint8_t * buf,const Symbol & s) const455 void IntelIBT::writeGotPlt(uint8_t *buf, const Symbol &s) const {
456   uint64_t va =
457       in.ibtPlt->getVA() + IBTPltHeaderSize + s.pltIndex * pltEntrySize;
458   write32le(buf, va);
459 }
460 
writePlt(uint8_t * buf,const Symbol & sym,uint64_t) const461 void IntelIBT::writePlt(uint8_t *buf, const Symbol &sym,
462                         uint64_t /*pltEntryAddr*/) const {
463   if (config->isPic) {
464     const uint8_t inst[] = {
465         0xf3, 0x0f, 0x1e, 0xfb,       // endbr32
466         0xff, 0xa3, 0,    0,    0, 0, // jmp *name@GOT(%ebx)
467         0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
468     };
469     memcpy(buf, inst, sizeof(inst));
470     write32le(buf + 6, sym.getGotPltVA() - in.gotPlt->getVA());
471     return;
472   }
473 
474   const uint8_t inst[] = {
475       0xf3, 0x0f, 0x1e, 0xfb,       // endbr32
476       0xff, 0x25, 0,    0,    0, 0, // jmp *foo@GOT
477       0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
478   };
479   memcpy(buf, inst, sizeof(inst));
480   write32le(buf + 6, sym.getGotPltVA());
481 }
482 
writeIBTPlt(uint8_t * buf,size_t numEntries) const483 void IntelIBT::writeIBTPlt(uint8_t *buf, size_t numEntries) const {
484   writePltHeader(buf);
485   buf += IBTPltHeaderSize;
486 
487   const uint8_t inst[] = {
488       0xf3, 0x0f, 0x1e, 0xfb,    // endbr32
489       0x68, 0,    0,    0,    0, // pushl $reloc_offset
490       0xe9, 0,    0,    0,    0, // jmpq .PLT0@PC
491       0x66, 0x90,                // nop
492   };
493 
494   for (size_t i = 0; i < numEntries; ++i) {
495     memcpy(buf, inst, sizeof(inst));
496     write32le(buf + 5, i * sizeof(object::ELF32LE::Rel));
497     write32le(buf + 10, -pltHeaderSize - sizeof(inst) * i - 30);
498     buf += sizeof(inst);
499   }
500 }
501 
502 namespace {
503 class RetpolinePic : public X86 {
504 public:
505   RetpolinePic();
506   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
507   void writePltHeader(uint8_t *buf) const override;
508   void writePlt(uint8_t *buf, const Symbol &sym,
509                 uint64_t pltEntryAddr) const override;
510 };
511 
512 class RetpolineNoPic : public X86 {
513 public:
514   RetpolineNoPic();
515   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
516   void writePltHeader(uint8_t *buf) const override;
517   void writePlt(uint8_t *buf, const Symbol &sym,
518                 uint64_t pltEntryAddr) const override;
519 };
520 } // namespace
521 
RetpolinePic()522 RetpolinePic::RetpolinePic() {
523   pltHeaderSize = 48;
524   pltEntrySize = 32;
525   ipltEntrySize = 32;
526 }
527 
writeGotPlt(uint8_t * buf,const Symbol & s) const528 void RetpolinePic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
529   write32le(buf, s.getPltVA() + 17);
530 }
531 
writePltHeader(uint8_t * buf) const532 void RetpolinePic::writePltHeader(uint8_t *buf) const {
533   const uint8_t insn[] = {
534       0xff, 0xb3, 4,    0,    0,    0,          // 0:    pushl 4(%ebx)
535       0x50,                                     // 6:    pushl %eax
536       0x8b, 0x83, 8,    0,    0,    0,          // 7:    mov 8(%ebx), %eax
537       0xe8, 0x0e, 0x00, 0x00, 0x00,             // d:    call next
538       0xf3, 0x90,                               // 12: loop: pause
539       0x0f, 0xae, 0xe8,                         // 14:   lfence
540       0xeb, 0xf9,                               // 17:   jmp loop
541       0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19:   int3; .align 16
542       0x89, 0x0c, 0x24,                         // 20: next: mov %ecx, (%esp)
543       0x8b, 0x4c, 0x24, 0x04,                   // 23:   mov 0x4(%esp), %ecx
544       0x89, 0x44, 0x24, 0x04,                   // 27:   mov %eax ,0x4(%esp)
545       0x89, 0xc8,                               // 2b:   mov %ecx, %eax
546       0x59,                                     // 2d:   pop %ecx
547       0xc3,                                     // 2e:   ret
548       0xcc,                                     // 2f:   int3; padding
549   };
550   memcpy(buf, insn, sizeof(insn));
551 }
552 
writePlt(uint8_t * buf,const Symbol & sym,uint64_t pltEntryAddr) const553 void RetpolinePic::writePlt(uint8_t *buf, const Symbol &sym,
554                             uint64_t pltEntryAddr) const {
555   unsigned relOff = in.relaPlt->entsize * sym.pltIndex;
556   const uint8_t insn[] = {
557       0x50,                            // pushl %eax
558       0x8b, 0x83, 0,    0,    0,    0, // mov foo@GOT(%ebx), %eax
559       0xe8, 0,    0,    0,    0,       // call plt+0x20
560       0xe9, 0,    0,    0,    0,       // jmp plt+0x12
561       0x68, 0,    0,    0,    0,       // pushl $reloc_offset
562       0xe9, 0,    0,    0,    0,       // jmp plt+0
563       0xcc, 0xcc, 0xcc, 0xcc, 0xcc,    // int3; padding
564   };
565   memcpy(buf, insn, sizeof(insn));
566 
567   uint32_t ebx = in.gotPlt->getVA();
568   unsigned off = pltEntryAddr - in.plt->getVA();
569   write32le(buf + 3, sym.getGotPltVA() - ebx);
570   write32le(buf + 8, -off - 12 + 32);
571   write32le(buf + 13, -off - 17 + 18);
572   write32le(buf + 18, relOff);
573   write32le(buf + 23, -off - 27);
574 }
575 
RetpolineNoPic()576 RetpolineNoPic::RetpolineNoPic() {
577   pltHeaderSize = 48;
578   pltEntrySize = 32;
579   ipltEntrySize = 32;
580 }
581 
writeGotPlt(uint8_t * buf,const Symbol & s) const582 void RetpolineNoPic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
583   write32le(buf, s.getPltVA() + 16);
584 }
585 
writePltHeader(uint8_t * buf) const586 void RetpolineNoPic::writePltHeader(uint8_t *buf) const {
587   const uint8_t insn[] = {
588       0xff, 0x35, 0,    0,    0,    0, // 0:    pushl GOTPLT+4
589       0x50,                            // 6:    pushl %eax
590       0xa1, 0,    0,    0,    0,       // 7:    mov GOTPLT+8, %eax
591       0xe8, 0x0f, 0x00, 0x00, 0x00,    // c:    call next
592       0xf3, 0x90,                      // 11: loop: pause
593       0x0f, 0xae, 0xe8,                // 13:   lfence
594       0xeb, 0xf9,                      // 16:   jmp loop
595       0xcc, 0xcc, 0xcc, 0xcc, 0xcc,    // 18:   int3
596       0xcc, 0xcc, 0xcc,                // 1f:   int3; .align 16
597       0x89, 0x0c, 0x24,                // 20: next: mov %ecx, (%esp)
598       0x8b, 0x4c, 0x24, 0x04,          // 23:   mov 0x4(%esp), %ecx
599       0x89, 0x44, 0x24, 0x04,          // 27:   mov %eax ,0x4(%esp)
600       0x89, 0xc8,                      // 2b:   mov %ecx, %eax
601       0x59,                            // 2d:   pop %ecx
602       0xc3,                            // 2e:   ret
603       0xcc,                            // 2f:   int3; padding
604   };
605   memcpy(buf, insn, sizeof(insn));
606 
607   uint32_t gotPlt = in.gotPlt->getVA();
608   write32le(buf + 2, gotPlt + 4);
609   write32le(buf + 8, gotPlt + 8);
610 }
611 
writePlt(uint8_t * buf,const Symbol & sym,uint64_t pltEntryAddr) const612 void RetpolineNoPic::writePlt(uint8_t *buf, const Symbol &sym,
613                               uint64_t pltEntryAddr) const {
614   unsigned relOff = in.relaPlt->entsize * sym.pltIndex;
615   const uint8_t insn[] = {
616       0x50,                         // 0:  pushl %eax
617       0xa1, 0,    0,    0,    0,    // 1:  mov foo_in_GOT, %eax
618       0xe8, 0,    0,    0,    0,    // 6:  call plt+0x20
619       0xe9, 0,    0,    0,    0,    // b:  jmp plt+0x11
620       0x68, 0,    0,    0,    0,    // 10: pushl $reloc_offset
621       0xe9, 0,    0,    0,    0,    // 15: jmp plt+0
622       0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding
623       0xcc,                         // 1f: int3; padding
624   };
625   memcpy(buf, insn, sizeof(insn));
626 
627   unsigned off = pltEntryAddr - in.plt->getVA();
628   write32le(buf + 2, sym.getGotPltVA());
629   write32le(buf + 7, -off - 11 + 32);
630   write32le(buf + 12, -off - 16 + 17);
631   write32le(buf + 17, relOff);
632   write32le(buf + 22, -off - 26);
633 }
634 
getX86TargetInfo()635 TargetInfo *elf::getX86TargetInfo() {
636   if (config->zRetpolineplt) {
637     if (config->isPic) {
638       static RetpolinePic t;
639       return &t;
640     }
641     static RetpolineNoPic t;
642     return &t;
643   }
644 
645   if (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT) {
646     static IntelIBT t;
647     return &t;
648   }
649 
650   static X86 t;
651   return &t;
652 }
653