1 //===- X86.cpp ------------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "InputFiles.h" 10 #include "Symbols.h" 11 #include "SyntheticSections.h" 12 #include "Target.h" 13 #include "lld/Common/ErrorHandler.h" 14 #include "llvm/Support/Endian.h" 15 16 using namespace llvm; 17 using namespace llvm::support::endian; 18 using namespace llvm::ELF; 19 using namespace lld; 20 using namespace lld::elf; 21 22 namespace { 23 class X86 : public TargetInfo { 24 public: 25 X86(); 26 int getTlsGdRelaxSkip(RelType type) const override; 27 RelExpr getRelExpr(RelType type, const Symbol &s, 28 const uint8_t *loc) const override; 29 int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override; 30 void writeGotPltHeader(uint8_t *buf) const override; 31 RelType getDynRel(RelType type) const override; 32 void writeGotPlt(uint8_t *buf, const Symbol &s) const override; 33 void writeIgotPlt(uint8_t *buf, const Symbol &s) const override; 34 void writePltHeader(uint8_t *buf) const override; 35 void writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr, 36 int32_t index, unsigned relOff) const override; 37 void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; 38 39 RelExpr adjustRelaxExpr(RelType type, const uint8_t *data, 40 RelExpr expr) const override; 41 void relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const override; 42 void relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const override; 43 void relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const override; 44 void relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const override; 45 }; 46 } // namespace 47 48 X86::X86() { 49 copyRel = R_386_COPY; 50 gotRel = R_386_GLOB_DAT; 51 noneRel = R_386_NONE; 52 pltRel = R_386_JUMP_SLOT; 53 iRelativeRel = R_386_IRELATIVE; 54 relativeRel = R_386_RELATIVE; 55 symbolicRel = R_386_32; 56 tlsGotRel = R_386_TLS_TPOFF; 57 tlsModuleIndexRel = R_386_TLS_DTPMOD32; 58 tlsOffsetRel = R_386_TLS_DTPOFF32; 59 pltEntrySize = 16; 60 pltHeaderSize = 16; 61 trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3 62 63 // Align to the non-PAE large page size (known as a superpage or huge page). 64 // FreeBSD automatically promotes large, superpage-aligned allocations. 65 defaultImageBase = 0x400000; 66 } 67 68 int X86::getTlsGdRelaxSkip(RelType type) const { 69 return 2; 70 } 71 72 RelExpr X86::getRelExpr(RelType type, const Symbol &s, 73 const uint8_t *loc) const { 74 // There are 4 different TLS variable models with varying degrees of 75 // flexibility and performance. LocalExec and InitialExec models are fast but 76 // less-flexible models. If they are in use, we set DF_STATIC_TLS flag in the 77 // dynamic section to let runtime know about that. 78 if (type == R_386_TLS_LE || type == R_386_TLS_LE_32 || type == R_386_TLS_IE || 79 type == R_386_TLS_GOTIE) 80 config->hasStaticTlsModel = true; 81 82 switch (type) { 83 case R_386_8: 84 case R_386_16: 85 case R_386_32: 86 return R_ABS; 87 case R_386_TLS_LDO_32: 88 return R_DTPREL; 89 case R_386_TLS_GD: 90 return R_TLSGD_GOTPLT; 91 case R_386_TLS_LDM: 92 return R_TLSLD_GOTPLT; 93 case R_386_PLT32: 94 return R_PLT_PC; 95 case R_386_PC8: 96 case R_386_PC16: 97 case R_386_PC32: 98 return R_PC; 99 case R_386_GOTPC: 100 return R_GOTPLTONLY_PC; 101 case R_386_TLS_IE: 102 return R_GOT; 103 case R_386_GOT32: 104 case R_386_GOT32X: 105 // These relocations are arguably mis-designed because their calculations 106 // depend on the instructions they are applied to. This is bad because we 107 // usually don't care about whether the target section contains valid 108 // machine instructions or not. But this is part of the documented ABI, so 109 // we had to implement as the standard requires. 110 // 111 // x86 does not support PC-relative data access. Therefore, in order to 112 // access GOT contents, a GOT address needs to be known at link-time 113 // (which means non-PIC) or compilers have to emit code to get a GOT 114 // address at runtime (which means code is position-independent but 115 // compilers need to emit extra code for each GOT access.) This decision 116 // is made at compile-time. In the latter case, compilers emit code to 117 // load an GOT address to a register, which is usually %ebx. 118 // 119 // So, there are two ways to refer to symbol foo's GOT entry: foo@GOT or 120 // foo@GOT(%ebx). 121 // 122 // foo@GOT is not usable in PIC. If we are creating a PIC output and if we 123 // find such relocation, we should report an error. foo@GOT is resolved to 124 // an *absolute* address of foo's GOT entry, because both GOT address and 125 // foo's offset are known. In other words, it's G + A. 126 // 127 // foo@GOT(%ebx) needs to be resolved to a *relative* offset from a GOT to 128 // foo's GOT entry in the table, because GOT address is not known but foo's 129 // offset in the table is known. It's G + A - GOT. 130 // 131 // It's unfortunate that compilers emit the same relocation for these 132 // different use cases. In order to distinguish them, we have to read a 133 // machine instruction. 134 // 135 // The following code implements it. We assume that Loc[0] is the first byte 136 // of a displacement or an immediate field of a valid machine 137 // instruction. That means a ModRM byte is at Loc[-1]. By taking a look at 138 // the byte, we can determine whether the instruction uses the operand as an 139 // absolute address (R_GOT) or a register-relative address (R_GOTPLT). 140 return (loc[-1] & 0xc7) == 0x5 ? R_GOT : R_GOTPLT; 141 case R_386_TLS_GOTIE: 142 return R_GOTPLT; 143 case R_386_GOTOFF: 144 return R_GOTPLTREL; 145 case R_386_TLS_LE: 146 return R_TLS; 147 case R_386_TLS_LE_32: 148 return R_NEG_TLS; 149 case R_386_NONE: 150 return R_NONE; 151 default: 152 error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) + 153 ") against symbol " + toString(s)); 154 return R_NONE; 155 } 156 } 157 158 RelExpr X86::adjustRelaxExpr(RelType type, const uint8_t *data, 159 RelExpr expr) const { 160 switch (expr) { 161 default: 162 return expr; 163 case R_RELAX_TLS_GD_TO_IE: 164 return R_RELAX_TLS_GD_TO_IE_GOTPLT; 165 case R_RELAX_TLS_GD_TO_LE: 166 return R_RELAX_TLS_GD_TO_LE_NEG; 167 } 168 } 169 170 void X86::writeGotPltHeader(uint8_t *buf) const { 171 write32le(buf, mainPart->dynamic->getVA()); 172 } 173 174 void X86::writeGotPlt(uint8_t *buf, const Symbol &s) const { 175 // Entries in .got.plt initially points back to the corresponding 176 // PLT entries with a fixed offset to skip the first instruction. 177 write32le(buf, s.getPltVA() + 6); 178 } 179 180 void X86::writeIgotPlt(uint8_t *buf, const Symbol &s) const { 181 // An x86 entry is the address of the ifunc resolver function. 182 write32le(buf, s.getVA()); 183 } 184 185 RelType X86::getDynRel(RelType type) const { 186 if (type == R_386_TLS_LE) 187 return R_386_TLS_TPOFF; 188 if (type == R_386_TLS_LE_32) 189 return R_386_TLS_TPOFF32; 190 return type; 191 } 192 193 void X86::writePltHeader(uint8_t *buf) const { 194 if (config->isPic) { 195 const uint8_t v[] = { 196 0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl 4(%ebx) 197 0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *8(%ebx) 198 0x90, 0x90, 0x90, 0x90 // nop 199 }; 200 memcpy(buf, v, sizeof(v)); 201 return; 202 } 203 204 const uint8_t pltData[] = { 205 0xff, 0x35, 0, 0, 0, 0, // pushl (GOTPLT+4) 206 0xff, 0x25, 0, 0, 0, 0, // jmp *(GOTPLT+8) 207 0x90, 0x90, 0x90, 0x90, // nop 208 }; 209 memcpy(buf, pltData, sizeof(pltData)); 210 uint32_t gotPlt = in.gotPlt->getVA(); 211 write32le(buf + 2, gotPlt + 4); 212 write32le(buf + 8, gotPlt + 8); 213 } 214 215 void X86::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, 216 uint64_t pltEntryAddr, int32_t index, 217 unsigned relOff) const { 218 if (config->isPic) { 219 const uint8_t inst[] = { 220 0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx) 221 0x68, 0, 0, 0, 0, // pushl $reloc_offset 222 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC 223 }; 224 memcpy(buf, inst, sizeof(inst)); 225 write32le(buf + 2, gotPltEntryAddr - in.gotPlt->getVA()); 226 } else { 227 const uint8_t inst[] = { 228 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT 229 0x68, 0, 0, 0, 0, // pushl $reloc_offset 230 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC 231 }; 232 memcpy(buf, inst, sizeof(inst)); 233 write32le(buf + 2, gotPltEntryAddr); 234 } 235 236 write32le(buf + 7, relOff); 237 write32le(buf + 12, -pltHeaderSize - pltEntrySize * index - 16); 238 } 239 240 int64_t X86::getImplicitAddend(const uint8_t *buf, RelType type) const { 241 switch (type) { 242 case R_386_8: 243 case R_386_PC8: 244 return SignExtend64<8>(*buf); 245 case R_386_16: 246 case R_386_PC16: 247 return SignExtend64<16>(read16le(buf)); 248 case R_386_32: 249 case R_386_GOT32: 250 case R_386_GOT32X: 251 case R_386_GOTOFF: 252 case R_386_GOTPC: 253 case R_386_PC32: 254 case R_386_PLT32: 255 case R_386_TLS_LDO_32: 256 case R_386_TLS_LE: 257 return SignExtend64<32>(read32le(buf)); 258 default: 259 return 0; 260 } 261 } 262 263 void X86::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { 264 switch (type) { 265 case R_386_8: 266 // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are 267 // being used for some 16-bit programs such as boot loaders, so 268 // we want to support them. 269 checkIntUInt(loc, val, 8, type); 270 *loc = val; 271 break; 272 case R_386_PC8: 273 checkInt(loc, val, 8, type); 274 *loc = val; 275 break; 276 case R_386_16: 277 checkIntUInt(loc, val, 16, type); 278 write16le(loc, val); 279 break; 280 case R_386_PC16: 281 // R_386_PC16 is normally used with 16 bit code. In that situation 282 // the PC is 16 bits, just like the addend. This means that it can 283 // point from any 16 bit address to any other if the possibility 284 // of wrapping is included. 285 // The only restriction we have to check then is that the destination 286 // address fits in 16 bits. That is impossible to do here. The problem is 287 // that we are passed the final value, which already had the 288 // current location subtracted from it. 289 // We just check that Val fits in 17 bits. This misses some cases, but 290 // should have no false positives. 291 checkInt(loc, val, 17, type); 292 write16le(loc, val); 293 break; 294 case R_386_32: 295 case R_386_GOT32: 296 case R_386_GOT32X: 297 case R_386_GOTOFF: 298 case R_386_GOTPC: 299 case R_386_PC32: 300 case R_386_PLT32: 301 case R_386_RELATIVE: 302 case R_386_TLS_DTPMOD32: 303 case R_386_TLS_DTPOFF32: 304 case R_386_TLS_GD: 305 case R_386_TLS_GOTIE: 306 case R_386_TLS_IE: 307 case R_386_TLS_LDM: 308 case R_386_TLS_LDO_32: 309 case R_386_TLS_LE: 310 case R_386_TLS_LE_32: 311 case R_386_TLS_TPOFF: 312 case R_386_TLS_TPOFF32: 313 checkInt(loc, val, 32, type); 314 write32le(loc, val); 315 break; 316 default: 317 llvm_unreachable("unknown relocation"); 318 } 319 } 320 321 void X86::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const { 322 // Convert 323 // leal x@tlsgd(, %ebx, 1), 324 // call __tls_get_addr@plt 325 // to 326 // movl %gs:0,%eax 327 // subl $x@ntpoff,%eax 328 const uint8_t inst[] = { 329 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax 330 0x81, 0xe8, 0, 0, 0, 0, // subl Val(%ebx), %eax 331 }; 332 memcpy(loc - 3, inst, sizeof(inst)); 333 write32le(loc + 5, val); 334 } 335 336 void X86::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const { 337 // Convert 338 // leal x@tlsgd(, %ebx, 1), 339 // call __tls_get_addr@plt 340 // to 341 // movl %gs:0, %eax 342 // addl x@gotntpoff(%ebx), %eax 343 const uint8_t inst[] = { 344 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax 345 0x03, 0x83, 0, 0, 0, 0, // addl Val(%ebx), %eax 346 }; 347 memcpy(loc - 3, inst, sizeof(inst)); 348 write32le(loc + 5, val); 349 } 350 351 // In some conditions, relocations can be optimized to avoid using GOT. 352 // This function does that for Initial Exec to Local Exec case. 353 void X86::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const { 354 // Ulrich's document section 6.2 says that @gotntpoff can 355 // be used with MOVL or ADDL instructions. 356 // @indntpoff is similar to @gotntpoff, but for use in 357 // position dependent code. 358 uint8_t reg = (loc[-1] >> 3) & 7; 359 360 if (type == R_386_TLS_IE) { 361 if (loc[-1] == 0xa1) { 362 // "movl foo@indntpoff,%eax" -> "movl $foo,%eax" 363 // This case is different from the generic case below because 364 // this is a 5 byte instruction while below is 6 bytes. 365 loc[-1] = 0xb8; 366 } else if (loc[-2] == 0x8b) { 367 // "movl foo@indntpoff,%reg" -> "movl $foo,%reg" 368 loc[-2] = 0xc7; 369 loc[-1] = 0xc0 | reg; 370 } else { 371 // "addl foo@indntpoff,%reg" -> "addl $foo,%reg" 372 loc[-2] = 0x81; 373 loc[-1] = 0xc0 | reg; 374 } 375 } else { 376 assert(type == R_386_TLS_GOTIE); 377 if (loc[-2] == 0x8b) { 378 // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg" 379 loc[-2] = 0xc7; 380 loc[-1] = 0xc0 | reg; 381 } else { 382 // "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg" 383 loc[-2] = 0x8d; 384 loc[-1] = 0x80 | (reg << 3) | reg; 385 } 386 } 387 write32le(loc, val); 388 } 389 390 void X86::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const { 391 if (type == R_386_TLS_LDO_32) { 392 write32le(loc, val); 393 return; 394 } 395 396 // Convert 397 // leal foo(%reg),%eax 398 // call ___tls_get_addr 399 // to 400 // movl %gs:0,%eax 401 // nop 402 // leal 0(%esi,1),%esi 403 const uint8_t inst[] = { 404 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax 405 0x90, // nop 406 0x8d, 0x74, 0x26, 0x00, // leal 0(%esi,1),%esi 407 }; 408 memcpy(loc - 2, inst, sizeof(inst)); 409 } 410 411 namespace { 412 class RetpolinePic : public X86 { 413 public: 414 RetpolinePic(); 415 void writeGotPlt(uint8_t *buf, const Symbol &s) const override; 416 void writePltHeader(uint8_t *buf) const override; 417 void writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr, 418 int32_t index, unsigned relOff) const override; 419 }; 420 421 class RetpolineNoPic : public X86 { 422 public: 423 RetpolineNoPic(); 424 void writeGotPlt(uint8_t *buf, const Symbol &s) const override; 425 void writePltHeader(uint8_t *buf) const override; 426 void writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr, 427 int32_t index, unsigned relOff) const override; 428 }; 429 } // namespace 430 431 RetpolinePic::RetpolinePic() { 432 pltHeaderSize = 48; 433 pltEntrySize = 32; 434 } 435 436 void RetpolinePic::writeGotPlt(uint8_t *buf, const Symbol &s) const { 437 write32le(buf, s.getPltVA() + 17); 438 } 439 440 void RetpolinePic::writePltHeader(uint8_t *buf) const { 441 const uint8_t insn[] = { 442 0xff, 0xb3, 4, 0, 0, 0, // 0: pushl 4(%ebx) 443 0x50, // 6: pushl %eax 444 0x8b, 0x83, 8, 0, 0, 0, // 7: mov 8(%ebx), %eax 445 0xe8, 0x0e, 0x00, 0x00, 0x00, // d: call next 446 0xf3, 0x90, // 12: loop: pause 447 0x0f, 0xae, 0xe8, // 14: lfence 448 0xeb, 0xf9, // 17: jmp loop 449 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19: int3; .align 16 450 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp) 451 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx 452 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp) 453 0x89, 0xc8, // 2b: mov %ecx, %eax 454 0x59, // 2d: pop %ecx 455 0xc3, // 2e: ret 456 0xcc, // 2f: int3; padding 457 }; 458 memcpy(buf, insn, sizeof(insn)); 459 } 460 461 void RetpolinePic::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, 462 uint64_t pltEntryAddr, int32_t index, 463 unsigned relOff) const { 464 const uint8_t insn[] = { 465 0x50, // pushl %eax 466 0x8b, 0x83, 0, 0, 0, 0, // mov foo@GOT(%ebx), %eax 467 0xe8, 0, 0, 0, 0, // call plt+0x20 468 0xe9, 0, 0, 0, 0, // jmp plt+0x12 469 0x68, 0, 0, 0, 0, // pushl $reloc_offset 470 0xe9, 0, 0, 0, 0, // jmp plt+0 471 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // int3; padding 472 }; 473 memcpy(buf, insn, sizeof(insn)); 474 475 uint32_t ebx = in.gotPlt->getVA(); 476 unsigned off = pltHeaderSize + pltEntrySize * index; 477 write32le(buf + 3, gotPltEntryAddr - ebx); 478 write32le(buf + 8, -off - 12 + 32); 479 write32le(buf + 13, -off - 17 + 18); 480 write32le(buf + 18, relOff); 481 write32le(buf + 23, -off - 27); 482 } 483 484 RetpolineNoPic::RetpolineNoPic() { 485 pltHeaderSize = 48; 486 pltEntrySize = 32; 487 } 488 489 void RetpolineNoPic::writeGotPlt(uint8_t *buf, const Symbol &s) const { 490 write32le(buf, s.getPltVA() + 16); 491 } 492 493 void RetpolineNoPic::writePltHeader(uint8_t *buf) const { 494 const uint8_t insn[] = { 495 0xff, 0x35, 0, 0, 0, 0, // 0: pushl GOTPLT+4 496 0x50, // 6: pushl %eax 497 0xa1, 0, 0, 0, 0, // 7: mov GOTPLT+8, %eax 498 0xe8, 0x0f, 0x00, 0x00, 0x00, // c: call next 499 0xf3, 0x90, // 11: loop: pause 500 0x0f, 0xae, 0xe8, // 13: lfence 501 0xeb, 0xf9, // 16: jmp loop 502 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 18: int3 503 0xcc, 0xcc, 0xcc, // 1f: int3; .align 16 504 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp) 505 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx 506 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp) 507 0x89, 0xc8, // 2b: mov %ecx, %eax 508 0x59, // 2d: pop %ecx 509 0xc3, // 2e: ret 510 0xcc, // 2f: int3; padding 511 }; 512 memcpy(buf, insn, sizeof(insn)); 513 514 uint32_t gotPlt = in.gotPlt->getVA(); 515 write32le(buf + 2, gotPlt + 4); 516 write32le(buf + 8, gotPlt + 8); 517 } 518 519 void RetpolineNoPic::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, 520 uint64_t pltEntryAddr, int32_t index, 521 unsigned relOff) const { 522 const uint8_t insn[] = { 523 0x50, // 0: pushl %eax 524 0xa1, 0, 0, 0, 0, // 1: mov foo_in_GOT, %eax 525 0xe8, 0, 0, 0, 0, // 6: call plt+0x20 526 0xe9, 0, 0, 0, 0, // b: jmp plt+0x11 527 0x68, 0, 0, 0, 0, // 10: pushl $reloc_offset 528 0xe9, 0, 0, 0, 0, // 15: jmp plt+0 529 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding 530 0xcc, // 1f: int3; padding 531 }; 532 memcpy(buf, insn, sizeof(insn)); 533 534 unsigned off = pltHeaderSize + pltEntrySize * index; 535 write32le(buf + 2, gotPltEntryAddr); 536 write32le(buf + 7, -off - 11 + 32); 537 write32le(buf + 12, -off - 16 + 17); 538 write32le(buf + 17, relOff); 539 write32le(buf + 22, -off - 26); 540 } 541 542 TargetInfo *elf::getX86TargetInfo() { 543 if (config->zRetpolineplt) { 544 if (config->isPic) { 545 static RetpolinePic t; 546 return &t; 547 } 548 static RetpolineNoPic t; 549 return &t; 550 } 551 552 static X86 t; 553 return &t; 554 } 555