1 //===- X86.cpp ------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "InputFiles.h"
10 #include "Symbols.h"
11 #include "SyntheticSections.h"
12 #include "Target.h"
13 #include "lld/Common/ErrorHandler.h"
14 #include "llvm/Support/Endian.h"
15 
16 using namespace llvm;
17 using namespace llvm::support::endian;
18 using namespace llvm::ELF;
19 using namespace lld;
20 using namespace lld::elf;
21 
22 namespace {
23 class X86 : public TargetInfo {
24 public:
25   X86();
26   int getTlsGdRelaxSkip(RelType type) const override;
27   RelExpr getRelExpr(RelType type, const Symbol &s,
28                      const uint8_t *loc) const override;
29   int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
30   void writeGotPltHeader(uint8_t *buf) const override;
31   RelType getDynRel(RelType type) const override;
32   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
33   void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
34   void writePltHeader(uint8_t *buf) const override;
35   void writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr,
36                 int32_t index, unsigned relOff) const override;
37   void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override;
38 
39   RelExpr adjustRelaxExpr(RelType type, const uint8_t *data,
40                           RelExpr expr) const override;
41   void relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const override;
42   void relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const override;
43   void relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const override;
44   void relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const override;
45 };
46 } // namespace
47 
48 X86::X86() {
49   copyRel = R_386_COPY;
50   gotRel = R_386_GLOB_DAT;
51   noneRel = R_386_NONE;
52   pltRel = R_386_JUMP_SLOT;
53   iRelativeRel = R_386_IRELATIVE;
54   relativeRel = R_386_RELATIVE;
55   symbolicRel = R_386_32;
56   tlsGotRel = R_386_TLS_TPOFF;
57   tlsModuleIndexRel = R_386_TLS_DTPMOD32;
58   tlsOffsetRel = R_386_TLS_DTPOFF32;
59   pltEntrySize = 16;
60   pltHeaderSize = 16;
61   trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
62 
63   // Align to the non-PAE large page size (known as a superpage or huge page).
64   // FreeBSD automatically promotes large, superpage-aligned allocations.
65   defaultImageBase = 0x400000;
66 }
67 
68 int X86::getTlsGdRelaxSkip(RelType type) const {
69   return 2;
70 }
71 
72 RelExpr X86::getRelExpr(RelType type, const Symbol &s,
73                         const uint8_t *loc) const {
74   // There are 4 different TLS variable models with varying degrees of
75   // flexibility and performance. LocalExec and InitialExec models are fast but
76   // less-flexible models. If they are in use, we set DF_STATIC_TLS flag in the
77   // dynamic section to let runtime know about that.
78   if (type == R_386_TLS_LE || type == R_386_TLS_LE_32 || type == R_386_TLS_IE ||
79       type == R_386_TLS_GOTIE)
80     config->hasStaticTlsModel = true;
81 
82   switch (type) {
83   case R_386_8:
84   case R_386_16:
85   case R_386_32:
86     return R_ABS;
87   case R_386_TLS_LDO_32:
88     return R_DTPREL;
89   case R_386_TLS_GD:
90     return R_TLSGD_GOTPLT;
91   case R_386_TLS_LDM:
92     return R_TLSLD_GOTPLT;
93   case R_386_PLT32:
94     return R_PLT_PC;
95   case R_386_PC8:
96   case R_386_PC16:
97   case R_386_PC32:
98     return R_PC;
99   case R_386_GOTPC:
100     return R_GOTPLTONLY_PC;
101   case R_386_TLS_IE:
102     return R_GOT;
103   case R_386_GOT32:
104   case R_386_GOT32X:
105     // These relocations are arguably mis-designed because their calculations
106     // depend on the instructions they are applied to. This is bad because we
107     // usually don't care about whether the target section contains valid
108     // machine instructions or not. But this is part of the documented ABI, so
109     // we had to implement as the standard requires.
110     //
111     // x86 does not support PC-relative data access. Therefore, in order to
112     // access GOT contents, a GOT address needs to be known at link-time
113     // (which means non-PIC) or compilers have to emit code to get a GOT
114     // address at runtime (which means code is position-independent but
115     // compilers need to emit extra code for each GOT access.) This decision
116     // is made at compile-time. In the latter case, compilers emit code to
117     // load an GOT address to a register, which is usually %ebx.
118     //
119     // So, there are two ways to refer to symbol foo's GOT entry: foo@GOT or
120     // foo@GOT(%ebx).
121     //
122     // foo@GOT is not usable in PIC. If we are creating a PIC output and if we
123     // find such relocation, we should report an error. foo@GOT is resolved to
124     // an *absolute* address of foo's GOT entry, because both GOT address and
125     // foo's offset are known. In other words, it's G + A.
126     //
127     // foo@GOT(%ebx) needs to be resolved to a *relative* offset from a GOT to
128     // foo's GOT entry in the table, because GOT address is not known but foo's
129     // offset in the table is known. It's G + A - GOT.
130     //
131     // It's unfortunate that compilers emit the same relocation for these
132     // different use cases. In order to distinguish them, we have to read a
133     // machine instruction.
134     //
135     // The following code implements it. We assume that Loc[0] is the first byte
136     // of a displacement or an immediate field of a valid machine
137     // instruction. That means a ModRM byte is at Loc[-1]. By taking a look at
138     // the byte, we can determine whether the instruction uses the operand as an
139     // absolute address (R_GOT) or a register-relative address (R_GOTPLT).
140     return (loc[-1] & 0xc7) == 0x5 ? R_GOT : R_GOTPLT;
141   case R_386_TLS_GOTIE:
142     return R_GOTPLT;
143   case R_386_GOTOFF:
144     return R_GOTPLTREL;
145   case R_386_TLS_LE:
146     return R_TLS;
147   case R_386_TLS_LE_32:
148     return R_NEG_TLS;
149   case R_386_NONE:
150     return R_NONE;
151   default:
152     error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
153           ") against symbol " + toString(s));
154     return R_NONE;
155   }
156 }
157 
158 RelExpr X86::adjustRelaxExpr(RelType type, const uint8_t *data,
159                              RelExpr expr) const {
160   switch (expr) {
161   default:
162     return expr;
163   case R_RELAX_TLS_GD_TO_IE:
164     return R_RELAX_TLS_GD_TO_IE_GOTPLT;
165   case R_RELAX_TLS_GD_TO_LE:
166     return R_RELAX_TLS_GD_TO_LE_NEG;
167   }
168 }
169 
170 void X86::writeGotPltHeader(uint8_t *buf) const {
171   write32le(buf, mainPart->dynamic->getVA());
172 }
173 
174 void X86::writeGotPlt(uint8_t *buf, const Symbol &s) const {
175   // Entries in .got.plt initially points back to the corresponding
176   // PLT entries with a fixed offset to skip the first instruction.
177   write32le(buf, s.getPltVA() + 6);
178 }
179 
180 void X86::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
181   // An x86 entry is the address of the ifunc resolver function.
182   write32le(buf, s.getVA());
183 }
184 
185 RelType X86::getDynRel(RelType type) const {
186   if (type == R_386_TLS_LE)
187     return R_386_TLS_TPOFF;
188   if (type == R_386_TLS_LE_32)
189     return R_386_TLS_TPOFF32;
190   return type;
191 }
192 
193 void X86::writePltHeader(uint8_t *buf) const {
194   if (config->isPic) {
195     const uint8_t v[] = {
196         0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl 4(%ebx)
197         0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *8(%ebx)
198         0x90, 0x90, 0x90, 0x90              // nop
199     };
200     memcpy(buf, v, sizeof(v));
201     return;
202   }
203 
204   const uint8_t pltData[] = {
205       0xff, 0x35, 0, 0, 0, 0, // pushl (GOTPLT+4)
206       0xff, 0x25, 0, 0, 0, 0, // jmp *(GOTPLT+8)
207       0x90, 0x90, 0x90, 0x90, // nop
208   };
209   memcpy(buf, pltData, sizeof(pltData));
210   uint32_t gotPlt = in.gotPlt->getVA();
211   write32le(buf + 2, gotPlt + 4);
212   write32le(buf + 8, gotPlt + 8);
213 }
214 
215 void X86::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr,
216                    uint64_t pltEntryAddr, int32_t index,
217                    unsigned relOff) const {
218   if (config->isPic) {
219     const uint8_t inst[] = {
220         0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx)
221         0x68, 0,    0, 0, 0,    // pushl $reloc_offset
222         0xe9, 0,    0, 0, 0,    // jmp .PLT0@PC
223     };
224     memcpy(buf, inst, sizeof(inst));
225     write32le(buf + 2, gotPltEntryAddr - in.gotPlt->getVA());
226   } else {
227     const uint8_t inst[] = {
228         0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
229         0x68, 0,    0, 0, 0,    // pushl $reloc_offset
230         0xe9, 0,    0, 0, 0,    // jmp .PLT0@PC
231     };
232     memcpy(buf, inst, sizeof(inst));
233     write32le(buf + 2, gotPltEntryAddr);
234   }
235 
236   write32le(buf + 7, relOff);
237   write32le(buf + 12, -pltHeaderSize - pltEntrySize * index - 16);
238 }
239 
240 int64_t X86::getImplicitAddend(const uint8_t *buf, RelType type) const {
241   switch (type) {
242   case R_386_8:
243   case R_386_PC8:
244     return SignExtend64<8>(*buf);
245   case R_386_16:
246   case R_386_PC16:
247     return SignExtend64<16>(read16le(buf));
248   case R_386_32:
249   case R_386_GOT32:
250   case R_386_GOT32X:
251   case R_386_GOTOFF:
252   case R_386_GOTPC:
253   case R_386_PC32:
254   case R_386_PLT32:
255   case R_386_TLS_LDO_32:
256   case R_386_TLS_LE:
257     return SignExtend64<32>(read32le(buf));
258   default:
259     return 0;
260   }
261 }
262 
263 void X86::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
264   switch (type) {
265   case R_386_8:
266     // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are
267     // being used for some 16-bit programs such as boot loaders, so
268     // we want to support them.
269     checkIntUInt(loc, val, 8, type);
270     *loc = val;
271     break;
272   case R_386_PC8:
273     checkInt(loc, val, 8, type);
274     *loc = val;
275     break;
276   case R_386_16:
277     checkIntUInt(loc, val, 16, type);
278     write16le(loc, val);
279     break;
280   case R_386_PC16:
281     // R_386_PC16 is normally used with 16 bit code. In that situation
282     // the PC is 16 bits, just like the addend. This means that it can
283     // point from any 16 bit address to any other if the possibility
284     // of wrapping is included.
285     // The only restriction we have to check then is that the destination
286     // address fits in 16 bits. That is impossible to do here. The problem is
287     // that we are passed the final value, which already had the
288     // current location subtracted from it.
289     // We just check that Val fits in 17 bits. This misses some cases, but
290     // should have no false positives.
291     checkInt(loc, val, 17, type);
292     write16le(loc, val);
293     break;
294   case R_386_32:
295   case R_386_GOT32:
296   case R_386_GOT32X:
297   case R_386_GOTOFF:
298   case R_386_GOTPC:
299   case R_386_PC32:
300   case R_386_PLT32:
301   case R_386_RELATIVE:
302   case R_386_TLS_DTPMOD32:
303   case R_386_TLS_DTPOFF32:
304   case R_386_TLS_GD:
305   case R_386_TLS_GOTIE:
306   case R_386_TLS_IE:
307   case R_386_TLS_LDM:
308   case R_386_TLS_LDO_32:
309   case R_386_TLS_LE:
310   case R_386_TLS_LE_32:
311   case R_386_TLS_TPOFF:
312   case R_386_TLS_TPOFF32:
313     checkInt(loc, val, 32, type);
314     write32le(loc, val);
315     break;
316   default:
317     llvm_unreachable("unknown relocation");
318   }
319 }
320 
321 void X86::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const {
322   // Convert
323   //   leal x@tlsgd(, %ebx, 1),
324   //   call __tls_get_addr@plt
325   // to
326   //   movl %gs:0,%eax
327   //   subl $x@ntpoff,%eax
328   const uint8_t inst[] = {
329       0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
330       0x81, 0xe8, 0, 0, 0, 0,             // subl Val(%ebx), %eax
331   };
332   memcpy(loc - 3, inst, sizeof(inst));
333   write32le(loc + 5, val);
334 }
335 
336 void X86::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const {
337   // Convert
338   //   leal x@tlsgd(, %ebx, 1),
339   //   call __tls_get_addr@plt
340   // to
341   //   movl %gs:0, %eax
342   //   addl x@gotntpoff(%ebx), %eax
343   const uint8_t inst[] = {
344       0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
345       0x03, 0x83, 0, 0, 0, 0,             // addl Val(%ebx), %eax
346   };
347   memcpy(loc - 3, inst, sizeof(inst));
348   write32le(loc + 5, val);
349 }
350 
351 // In some conditions, relocations can be optimized to avoid using GOT.
352 // This function does that for Initial Exec to Local Exec case.
353 void X86::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const {
354   // Ulrich's document section 6.2 says that @gotntpoff can
355   // be used with MOVL or ADDL instructions.
356   // @indntpoff is similar to @gotntpoff, but for use in
357   // position dependent code.
358   uint8_t reg = (loc[-1] >> 3) & 7;
359 
360   if (type == R_386_TLS_IE) {
361     if (loc[-1] == 0xa1) {
362       // "movl foo@indntpoff,%eax" -> "movl $foo,%eax"
363       // This case is different from the generic case below because
364       // this is a 5 byte instruction while below is 6 bytes.
365       loc[-1] = 0xb8;
366     } else if (loc[-2] == 0x8b) {
367       // "movl foo@indntpoff,%reg" -> "movl $foo,%reg"
368       loc[-2] = 0xc7;
369       loc[-1] = 0xc0 | reg;
370     } else {
371       // "addl foo@indntpoff,%reg" -> "addl $foo,%reg"
372       loc[-2] = 0x81;
373       loc[-1] = 0xc0 | reg;
374     }
375   } else {
376     assert(type == R_386_TLS_GOTIE);
377     if (loc[-2] == 0x8b) {
378       // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg"
379       loc[-2] = 0xc7;
380       loc[-1] = 0xc0 | reg;
381     } else {
382       // "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg"
383       loc[-2] = 0x8d;
384       loc[-1] = 0x80 | (reg << 3) | reg;
385     }
386   }
387   write32le(loc, val);
388 }
389 
390 void X86::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const {
391   if (type == R_386_TLS_LDO_32) {
392     write32le(loc, val);
393     return;
394   }
395 
396   // Convert
397   //   leal foo(%reg),%eax
398   //   call ___tls_get_addr
399   // to
400   //   movl %gs:0,%eax
401   //   nop
402   //   leal 0(%esi,1),%esi
403   const uint8_t inst[] = {
404       0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
405       0x90,                               // nop
406       0x8d, 0x74, 0x26, 0x00,             // leal 0(%esi,1),%esi
407   };
408   memcpy(loc - 2, inst, sizeof(inst));
409 }
410 
411 namespace {
412 class RetpolinePic : public X86 {
413 public:
414   RetpolinePic();
415   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
416   void writePltHeader(uint8_t *buf) const override;
417   void writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr,
418                 int32_t index, unsigned relOff) const override;
419 };
420 
421 class RetpolineNoPic : public X86 {
422 public:
423   RetpolineNoPic();
424   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
425   void writePltHeader(uint8_t *buf) const override;
426   void writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr,
427                 int32_t index, unsigned relOff) const override;
428 };
429 } // namespace
430 
431 RetpolinePic::RetpolinePic() {
432   pltHeaderSize = 48;
433   pltEntrySize = 32;
434 }
435 
436 void RetpolinePic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
437   write32le(buf, s.getPltVA() + 17);
438 }
439 
440 void RetpolinePic::writePltHeader(uint8_t *buf) const {
441   const uint8_t insn[] = {
442       0xff, 0xb3, 4,    0,    0,    0,          // 0:    pushl 4(%ebx)
443       0x50,                                     // 6:    pushl %eax
444       0x8b, 0x83, 8,    0,    0,    0,          // 7:    mov 8(%ebx), %eax
445       0xe8, 0x0e, 0x00, 0x00, 0x00,             // d:    call next
446       0xf3, 0x90,                               // 12: loop: pause
447       0x0f, 0xae, 0xe8,                         // 14:   lfence
448       0xeb, 0xf9,                               // 17:   jmp loop
449       0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19:   int3; .align 16
450       0x89, 0x0c, 0x24,                         // 20: next: mov %ecx, (%esp)
451       0x8b, 0x4c, 0x24, 0x04,                   // 23:   mov 0x4(%esp), %ecx
452       0x89, 0x44, 0x24, 0x04,                   // 27:   mov %eax ,0x4(%esp)
453       0x89, 0xc8,                               // 2b:   mov %ecx, %eax
454       0x59,                                     // 2d:   pop %ecx
455       0xc3,                                     // 2e:   ret
456       0xcc,                                     // 2f:   int3; padding
457   };
458   memcpy(buf, insn, sizeof(insn));
459 }
460 
461 void RetpolinePic::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr,
462                             uint64_t pltEntryAddr, int32_t index,
463                             unsigned relOff) const {
464   const uint8_t insn[] = {
465       0x50,                            // pushl %eax
466       0x8b, 0x83, 0,    0,    0,    0, // mov foo@GOT(%ebx), %eax
467       0xe8, 0,    0,    0,    0,       // call plt+0x20
468       0xe9, 0,    0,    0,    0,       // jmp plt+0x12
469       0x68, 0,    0,    0,    0,       // pushl $reloc_offset
470       0xe9, 0,    0,    0,    0,       // jmp plt+0
471       0xcc, 0xcc, 0xcc, 0xcc, 0xcc,    // int3; padding
472   };
473   memcpy(buf, insn, sizeof(insn));
474 
475   uint32_t ebx = in.gotPlt->getVA();
476   unsigned off = pltHeaderSize + pltEntrySize * index;
477   write32le(buf + 3, gotPltEntryAddr - ebx);
478   write32le(buf + 8, -off - 12 + 32);
479   write32le(buf + 13, -off - 17 + 18);
480   write32le(buf + 18, relOff);
481   write32le(buf + 23, -off - 27);
482 }
483 
484 RetpolineNoPic::RetpolineNoPic() {
485   pltHeaderSize = 48;
486   pltEntrySize = 32;
487 }
488 
489 void RetpolineNoPic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
490   write32le(buf, s.getPltVA() + 16);
491 }
492 
493 void RetpolineNoPic::writePltHeader(uint8_t *buf) const {
494   const uint8_t insn[] = {
495       0xff, 0x35, 0,    0,    0,    0, // 0:    pushl GOTPLT+4
496       0x50,                            // 6:    pushl %eax
497       0xa1, 0,    0,    0,    0,       // 7:    mov GOTPLT+8, %eax
498       0xe8, 0x0f, 0x00, 0x00, 0x00,    // c:    call next
499       0xf3, 0x90,                      // 11: loop: pause
500       0x0f, 0xae, 0xe8,                // 13:   lfence
501       0xeb, 0xf9,                      // 16:   jmp loop
502       0xcc, 0xcc, 0xcc, 0xcc, 0xcc,    // 18:   int3
503       0xcc, 0xcc, 0xcc,                // 1f:   int3; .align 16
504       0x89, 0x0c, 0x24,                // 20: next: mov %ecx, (%esp)
505       0x8b, 0x4c, 0x24, 0x04,          // 23:   mov 0x4(%esp), %ecx
506       0x89, 0x44, 0x24, 0x04,          // 27:   mov %eax ,0x4(%esp)
507       0x89, 0xc8,                      // 2b:   mov %ecx, %eax
508       0x59,                            // 2d:   pop %ecx
509       0xc3,                            // 2e:   ret
510       0xcc,                            // 2f:   int3; padding
511   };
512   memcpy(buf, insn, sizeof(insn));
513 
514   uint32_t gotPlt = in.gotPlt->getVA();
515   write32le(buf + 2, gotPlt + 4);
516   write32le(buf + 8, gotPlt + 8);
517 }
518 
519 void RetpolineNoPic::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr,
520                               uint64_t pltEntryAddr, int32_t index,
521                               unsigned relOff) const {
522   const uint8_t insn[] = {
523       0x50,                         // 0:  pushl %eax
524       0xa1, 0,    0,    0,    0,    // 1:  mov foo_in_GOT, %eax
525       0xe8, 0,    0,    0,    0,    // 6:  call plt+0x20
526       0xe9, 0,    0,    0,    0,    // b:  jmp plt+0x11
527       0x68, 0,    0,    0,    0,    // 10: pushl $reloc_offset
528       0xe9, 0,    0,    0,    0,    // 15: jmp plt+0
529       0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding
530       0xcc,                         // 1f: int3; padding
531   };
532   memcpy(buf, insn, sizeof(insn));
533 
534   unsigned off = pltHeaderSize + pltEntrySize * index;
535   write32le(buf + 2, gotPltEntryAddr);
536   write32le(buf + 7, -off - 11 + 32);
537   write32le(buf + 12, -off - 16 + 17);
538   write32le(buf + 17, relOff);
539   write32le(buf + 22, -off - 26);
540 }
541 
542 TargetInfo *elf::getX86TargetInfo() {
543   if (config->zRetpolineplt) {
544     if (config->isPic) {
545       static RetpolinePic t;
546       return &t;
547     }
548     static RetpolineNoPic t;
549     return &t;
550   }
551 
552   static X86 t;
553   return &t;
554 }
555