1 //===- InputChunks.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "InputChunks.h"
10 #include "Config.h"
11 #include "OutputSegment.h"
12 #include "WriterUtils.h"
13 #include "lld/Common/ErrorHandler.h"
14 #include "lld/Common/LLVM.h"
15 #include "llvm/Support/LEB128.h"
16 #include "llvm/Support/xxhash.h"
17
18 #define DEBUG_TYPE "lld"
19
20 using namespace llvm;
21 using namespace llvm::wasm;
22 using namespace llvm::support::endian;
23
24 namespace lld {
relocTypeToString(uint8_t relocType)25 StringRef relocTypeToString(uint8_t relocType) {
26 switch (relocType) {
27 #define WASM_RELOC(NAME, REL) \
28 case REL: \
29 return #NAME;
30 #include "llvm/BinaryFormat/WasmRelocs.def"
31 #undef WASM_RELOC
32 }
33 llvm_unreachable("unknown reloc type");
34 }
35
relocIs64(uint8_t relocType)36 bool relocIs64(uint8_t relocType) {
37 switch (relocType) {
38 case R_WASM_MEMORY_ADDR_LEB64:
39 case R_WASM_MEMORY_ADDR_SLEB64:
40 case R_WASM_MEMORY_ADDR_REL_SLEB64:
41 case R_WASM_MEMORY_ADDR_I64:
42 case R_WASM_TABLE_INDEX_SLEB64:
43 case R_WASM_TABLE_INDEX_I64:
44 case R_WASM_FUNCTION_OFFSET_I64:
45 case R_WASM_TABLE_INDEX_REL_SLEB64:
46 case R_WASM_MEMORY_ADDR_TLS_SLEB64:
47 return true;
48 default:
49 return false;
50 }
51 }
52
toString(const wasm::InputChunk * c)53 std::string toString(const wasm::InputChunk *c) {
54 return (toString(c->file) + ":(" + c->getName() + ")").str();
55 }
56
57 namespace wasm {
getComdatName() const58 StringRef InputChunk::getComdatName() const {
59 uint32_t index = getComdat();
60 if (index == UINT32_MAX)
61 return StringRef();
62 return file->getWasmObj()->linkingData().Comdats[index];
63 }
64
getSize() const65 uint32_t InputChunk::getSize() const {
66 if (const auto *ms = dyn_cast<SyntheticMergedChunk>(this))
67 return ms->builder.getSize();
68
69 if (const auto *f = dyn_cast<InputFunction>(this)) {
70 if (config->compressRelocations && f->file) {
71 return f->getCompressedSize();
72 }
73 }
74
75 return data().size();
76 }
77
getInputSize() const78 uint32_t InputChunk::getInputSize() const {
79 if (const auto *f = dyn_cast<InputFunction>(this))
80 return f->function->Size;
81 return getSize();
82 }
83
84 // Copy this input chunk to an mmap'ed output file and apply relocations.
writeTo(uint8_t * buf) const85 void InputChunk::writeTo(uint8_t *buf) const {
86 if (const auto *f = dyn_cast<InputFunction>(this)) {
87 if (file && config->compressRelocations)
88 return f->writeCompressed(buf);
89 } else if (const auto *ms = dyn_cast<SyntheticMergedChunk>(this)) {
90 ms->builder.write(buf + outSecOff);
91 // Apply relocations
92 ms->relocate(buf + outSecOff);
93 return;
94 }
95
96 // Copy contents
97 memcpy(buf + outSecOff, data().data(), data().size());
98
99 // Apply relocations
100 relocate(buf + outSecOff);
101 }
102
relocate(uint8_t * buf) const103 void InputChunk::relocate(uint8_t *buf) const {
104 if (relocations.empty())
105 return;
106
107 LLVM_DEBUG(dbgs() << "applying relocations: " << toString(this)
108 << " count=" << relocations.size() << "\n");
109 int32_t inputSectionOffset = getInputSectionOffset();
110 uint64_t tombstone = getTombstone();
111
112 for (const WasmRelocation &rel : relocations) {
113 uint8_t *loc = buf + rel.Offset - inputSectionOffset;
114 LLVM_DEBUG(dbgs() << "apply reloc: type=" << relocTypeToString(rel.Type));
115 if (rel.Type != R_WASM_TYPE_INDEX_LEB)
116 LLVM_DEBUG(dbgs() << " sym=" << file->getSymbols()[rel.Index]->getName());
117 LLVM_DEBUG(dbgs() << " addend=" << rel.Addend << " index=" << rel.Index
118 << " offset=" << rel.Offset << "\n");
119 auto value = file->calcNewValue(rel, tombstone, this);
120
121 switch (rel.Type) {
122 case R_WASM_TYPE_INDEX_LEB:
123 case R_WASM_FUNCTION_INDEX_LEB:
124 case R_WASM_GLOBAL_INDEX_LEB:
125 case R_WASM_TAG_INDEX_LEB:
126 case R_WASM_MEMORY_ADDR_LEB:
127 case R_WASM_TABLE_NUMBER_LEB:
128 encodeULEB128(value, loc, 5);
129 break;
130 case R_WASM_MEMORY_ADDR_LEB64:
131 encodeULEB128(value, loc, 10);
132 break;
133 case R_WASM_TABLE_INDEX_SLEB:
134 case R_WASM_TABLE_INDEX_REL_SLEB:
135 case R_WASM_MEMORY_ADDR_SLEB:
136 case R_WASM_MEMORY_ADDR_REL_SLEB:
137 case R_WASM_MEMORY_ADDR_TLS_SLEB:
138 encodeSLEB128(static_cast<int32_t>(value), loc, 5);
139 break;
140 case R_WASM_TABLE_INDEX_SLEB64:
141 case R_WASM_TABLE_INDEX_REL_SLEB64:
142 case R_WASM_MEMORY_ADDR_SLEB64:
143 case R_WASM_MEMORY_ADDR_REL_SLEB64:
144 case R_WASM_MEMORY_ADDR_TLS_SLEB64:
145 encodeSLEB128(static_cast<int64_t>(value), loc, 10);
146 break;
147 case R_WASM_TABLE_INDEX_I32:
148 case R_WASM_MEMORY_ADDR_I32:
149 case R_WASM_FUNCTION_OFFSET_I32:
150 case R_WASM_SECTION_OFFSET_I32:
151 case R_WASM_GLOBAL_INDEX_I32:
152 case R_WASM_MEMORY_ADDR_LOCREL_I32:
153 write32le(loc, value);
154 break;
155 case R_WASM_TABLE_INDEX_I64:
156 case R_WASM_MEMORY_ADDR_I64:
157 case R_WASM_FUNCTION_OFFSET_I64:
158 write64le(loc, value);
159 break;
160 default:
161 llvm_unreachable("unknown relocation type");
162 }
163 }
164 }
165
166 // Copy relocation entries to a given output stream.
167 // This function is used only when a user passes "-r". For a regular link,
168 // we consume relocations instead of copying them to an output file.
writeRelocations(raw_ostream & os) const169 void InputChunk::writeRelocations(raw_ostream &os) const {
170 if (relocations.empty())
171 return;
172
173 int32_t off = outSecOff - getInputSectionOffset();
174 LLVM_DEBUG(dbgs() << "writeRelocations: " << file->getName()
175 << " offset=" << Twine(off) << "\n");
176
177 for (const WasmRelocation &rel : relocations) {
178 writeUleb128(os, rel.Type, "reloc type");
179 writeUleb128(os, rel.Offset + off, "reloc offset");
180 writeUleb128(os, file->calcNewIndex(rel), "reloc index");
181
182 if (relocTypeHasAddend(rel.Type))
183 writeSleb128(os, file->calcNewAddend(rel), "reloc addend");
184 }
185 }
186
getTombstone() const187 uint64_t InputChunk::getTombstone() const {
188 if (const auto *s = dyn_cast<InputSection>(this)) {
189 return s->tombstoneValue;
190 }
191
192 return 0;
193 }
194
setFunctionIndex(uint32_t index)195 void InputFunction::setFunctionIndex(uint32_t index) {
196 LLVM_DEBUG(dbgs() << "InputFunction::setFunctionIndex: " << getName()
197 << " -> " << index << "\n");
198 assert(!hasFunctionIndex());
199 functionIndex = index;
200 }
201
setTableIndex(uint32_t index)202 void InputFunction::setTableIndex(uint32_t index) {
203 LLVM_DEBUG(dbgs() << "InputFunction::setTableIndex: " << getName() << " -> "
204 << index << "\n");
205 assert(!hasTableIndex());
206 tableIndex = index;
207 }
208
209 // Write a relocation value without padding and return the number of bytes
210 // witten.
writeCompressedReloc(uint8_t * buf,const WasmRelocation & rel,uint64_t value)211 static unsigned writeCompressedReloc(uint8_t *buf, const WasmRelocation &rel,
212 uint64_t value) {
213 switch (rel.Type) {
214 case R_WASM_TYPE_INDEX_LEB:
215 case R_WASM_FUNCTION_INDEX_LEB:
216 case R_WASM_GLOBAL_INDEX_LEB:
217 case R_WASM_TAG_INDEX_LEB:
218 case R_WASM_MEMORY_ADDR_LEB:
219 case R_WASM_MEMORY_ADDR_LEB64:
220 case R_WASM_TABLE_NUMBER_LEB:
221 return encodeULEB128(value, buf);
222 case R_WASM_TABLE_INDEX_SLEB:
223 case R_WASM_TABLE_INDEX_SLEB64:
224 case R_WASM_MEMORY_ADDR_SLEB:
225 case R_WASM_MEMORY_ADDR_SLEB64:
226 return encodeSLEB128(static_cast<int64_t>(value), buf);
227 default:
228 llvm_unreachable("unexpected relocation type");
229 }
230 }
231
getRelocWidthPadded(const WasmRelocation & rel)232 static unsigned getRelocWidthPadded(const WasmRelocation &rel) {
233 switch (rel.Type) {
234 case R_WASM_TYPE_INDEX_LEB:
235 case R_WASM_FUNCTION_INDEX_LEB:
236 case R_WASM_GLOBAL_INDEX_LEB:
237 case R_WASM_TAG_INDEX_LEB:
238 case R_WASM_MEMORY_ADDR_LEB:
239 case R_WASM_TABLE_NUMBER_LEB:
240 case R_WASM_TABLE_INDEX_SLEB:
241 case R_WASM_MEMORY_ADDR_SLEB:
242 return 5;
243 case R_WASM_TABLE_INDEX_SLEB64:
244 case R_WASM_MEMORY_ADDR_LEB64:
245 case R_WASM_MEMORY_ADDR_SLEB64:
246 return 10;
247 default:
248 llvm_unreachable("unexpected relocation type");
249 }
250 }
251
getRelocWidth(const WasmRelocation & rel,uint64_t value)252 static unsigned getRelocWidth(const WasmRelocation &rel, uint64_t value) {
253 uint8_t buf[10];
254 return writeCompressedReloc(buf, rel, value);
255 }
256
257 // Relocations of type LEB and SLEB in the code section are padded to 5 bytes
258 // so that a fast linker can blindly overwrite them without needing to worry
259 // about the number of bytes needed to encode the values.
260 // However, for optimal output the code section can be compressed to remove
261 // the padding then outputting non-relocatable files.
262 // In this case we need to perform a size calculation based on the value at each
263 // relocation. At best we end up saving 4 bytes for each relocation entry.
264 //
265 // This function only computes the final output size. It must be called
266 // before getSize() is used to calculate of layout of the code section.
calculateSize()267 void InputFunction::calculateSize() {
268 if (!file || !config->compressRelocations)
269 return;
270
271 LLVM_DEBUG(dbgs() << "calculateSize: " << getName() << "\n");
272
273 const uint8_t *secStart = file->codeSection->Content.data();
274 const uint8_t *funcStart = secStart + getInputSectionOffset();
275 uint32_t functionSizeLength;
276 decodeULEB128(funcStart, &functionSizeLength);
277
278 uint32_t start = getInputSectionOffset();
279 uint32_t end = start + function->Size;
280
281 uint64_t tombstone = getTombstone();
282
283 uint32_t lastRelocEnd = start + functionSizeLength;
284 for (const WasmRelocation &rel : relocations) {
285 LLVM_DEBUG(dbgs() << " region: " << (rel.Offset - lastRelocEnd) << "\n");
286 compressedFuncSize += rel.Offset - lastRelocEnd;
287 compressedFuncSize +=
288 getRelocWidth(rel, file->calcNewValue(rel, tombstone, this));
289 lastRelocEnd = rel.Offset + getRelocWidthPadded(rel);
290 }
291 LLVM_DEBUG(dbgs() << " final region: " << (end - lastRelocEnd) << "\n");
292 compressedFuncSize += end - lastRelocEnd;
293
294 // Now we know how long the resulting function is we can add the encoding
295 // of its length
296 uint8_t buf[5];
297 compressedSize = compressedFuncSize + encodeULEB128(compressedFuncSize, buf);
298
299 LLVM_DEBUG(dbgs() << " calculateSize orig: " << function->Size << "\n");
300 LLVM_DEBUG(dbgs() << " calculateSize new: " << compressedSize << "\n");
301 }
302
303 // Override the default writeTo method so that we can (optionally) write the
304 // compressed version of the function.
writeCompressed(uint8_t * buf) const305 void InputFunction::writeCompressed(uint8_t *buf) const {
306 buf += outSecOff;
307 uint8_t *orig = buf;
308 (void)orig;
309
310 const uint8_t *secStart = file->codeSection->Content.data();
311 const uint8_t *funcStart = secStart + getInputSectionOffset();
312 const uint8_t *end = funcStart + function->Size;
313 uint64_t tombstone = getTombstone();
314 uint32_t count;
315 decodeULEB128(funcStart, &count);
316 funcStart += count;
317
318 LLVM_DEBUG(dbgs() << "write func: " << getName() << "\n");
319 buf += encodeULEB128(compressedFuncSize, buf);
320 const uint8_t *lastRelocEnd = funcStart;
321 for (const WasmRelocation &rel : relocations) {
322 unsigned chunkSize = (secStart + rel.Offset) - lastRelocEnd;
323 LLVM_DEBUG(dbgs() << " write chunk: " << chunkSize << "\n");
324 memcpy(buf, lastRelocEnd, chunkSize);
325 buf += chunkSize;
326 buf += writeCompressedReloc(buf, rel,
327 file->calcNewValue(rel, tombstone, this));
328 lastRelocEnd = secStart + rel.Offset + getRelocWidthPadded(rel);
329 }
330
331 unsigned chunkSize = end - lastRelocEnd;
332 LLVM_DEBUG(dbgs() << " write final chunk: " << chunkSize << "\n");
333 memcpy(buf, lastRelocEnd, chunkSize);
334 LLVM_DEBUG(dbgs() << " total: " << (buf + chunkSize - orig) << "\n");
335 }
336
getChunkOffset(uint64_t offset) const337 uint64_t InputChunk::getChunkOffset(uint64_t offset) const {
338 if (const auto *ms = dyn_cast<MergeInputChunk>(this)) {
339 LLVM_DEBUG(dbgs() << "getChunkOffset(merged): " << getName() << "\n");
340 LLVM_DEBUG(dbgs() << "offset: " << offset << "\n");
341 LLVM_DEBUG(dbgs() << "parentOffset: " << ms->getParentOffset(offset)
342 << "\n");
343 assert(ms->parent);
344 return ms->parent->getChunkOffset(ms->getParentOffset(offset));
345 }
346 return outputSegmentOffset + offset;
347 }
348
getOffset(uint64_t offset) const349 uint64_t InputChunk::getOffset(uint64_t offset) const {
350 return outSecOff + getChunkOffset(offset);
351 }
352
getVA(uint64_t offset) const353 uint64_t InputChunk::getVA(uint64_t offset) const {
354 return (outputSeg ? outputSeg->startVA : 0) + getChunkOffset(offset);
355 }
356
357 // Generate code to apply relocations to the data section at runtime.
358 // This is only called when generating shared libaries (PIC) where address are
359 // not known at static link time.
generateRelocationCode(raw_ostream & os) const360 void InputChunk::generateRelocationCode(raw_ostream &os) const {
361 LLVM_DEBUG(dbgs() << "generating runtime relocations: " << getName()
362 << " count=" << relocations.size() << "\n");
363
364 bool is64 = config->is64.getValueOr(false);
365 unsigned opcode_ptr_const = is64 ? WASM_OPCODE_I64_CONST
366 : WASM_OPCODE_I32_CONST;
367 unsigned opcode_ptr_add = is64 ? WASM_OPCODE_I64_ADD
368 : WASM_OPCODE_I32_ADD;
369
370 uint64_t tombstone = getTombstone();
371 // TODO(sbc): Encode the relocations in the data section and write a loop
372 // here to apply them.
373 for (const WasmRelocation &rel : relocations) {
374 uint64_t offset = getVA(rel.Offset) - getInputSectionOffset();
375
376 LLVM_DEBUG(dbgs() << "gen reloc: type=" << relocTypeToString(rel.Type)
377 << " addend=" << rel.Addend << " index=" << rel.Index
378 << " output offset=" << offset << "\n");
379
380 // Get __memory_base
381 writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET");
382 writeUleb128(os, WasmSym::memoryBase->getGlobalIndex(), "memory_base");
383
384 // Add the offset of the relocation
385 writeU8(os, opcode_ptr_const, "CONST");
386 writeSleb128(os, offset, "offset");
387 writeU8(os, opcode_ptr_add, "ADD");
388
389 bool is64 = relocIs64(rel.Type);
390 unsigned opcode_reloc_const =
391 is64 ? WASM_OPCODE_I64_CONST : WASM_OPCODE_I32_CONST;
392 unsigned opcode_reloc_add =
393 is64 ? WASM_OPCODE_I64_ADD : WASM_OPCODE_I32_ADD;
394 unsigned opcode_reloc_store =
395 is64 ? WASM_OPCODE_I64_STORE : WASM_OPCODE_I32_STORE;
396
397 Symbol *sym = file->getSymbol(rel);
398 // Now figure out what we want to store
399 if (sym->hasGOTIndex()) {
400 writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET");
401 writeUleb128(os, sym->getGOTIndex(), "global index");
402 if (rel.Addend) {
403 writeU8(os, opcode_reloc_const, "CONST");
404 writeSleb128(os, rel.Addend, "addend");
405 writeU8(os, opcode_reloc_add, "ADD");
406 }
407 } else {
408 const GlobalSymbol* baseSymbol = WasmSym::memoryBase;
409 if (rel.Type == R_WASM_TABLE_INDEX_I32 ||
410 rel.Type == R_WASM_TABLE_INDEX_I64)
411 baseSymbol = WasmSym::tableBase;
412 writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET");
413 writeUleb128(os, baseSymbol->getGlobalIndex(), "base");
414 writeU8(os, opcode_reloc_const, "CONST");
415 writeSleb128(os, file->calcNewValue(rel, tombstone, this), "offset");
416 writeU8(os, opcode_reloc_add, "ADD");
417 }
418
419 // Store that value at the virtual address
420 writeU8(os, opcode_reloc_store, "I32_STORE");
421 writeUleb128(os, 2, "align");
422 writeUleb128(os, 0, "offset");
423 }
424 }
425
426 // Split WASM_SEG_FLAG_STRINGS section. Such a section is a sequence of
427 // null-terminated strings.
splitStrings(ArrayRef<uint8_t> data)428 void MergeInputChunk::splitStrings(ArrayRef<uint8_t> data) {
429 LLVM_DEBUG(llvm::dbgs() << "splitStrings\n");
430 size_t off = 0;
431 StringRef s = toStringRef(data);
432
433 while (!s.empty()) {
434 size_t end = s.find(0);
435 if (end == StringRef::npos)
436 fatal(toString(this) + ": string is not null terminated");
437 size_t size = end + 1;
438
439 pieces.emplace_back(off, xxHash64(s.substr(0, size)), true);
440 s = s.substr(size);
441 off += size;
442 }
443 }
444
445 // This function is called after we obtain a complete list of input sections
446 // that need to be linked. This is responsible to split section contents
447 // into small chunks for further processing.
448 //
449 // Note that this function is called from parallelForEach. This must be
450 // thread-safe (i.e. no memory allocation from the pools).
splitIntoPieces()451 void MergeInputChunk::splitIntoPieces() {
452 assert(pieces.empty());
453 // As of now we only support WASM_SEG_FLAG_STRINGS but in the future we
454 // could add other types of splitting (see ELF's splitIntoPieces).
455 assert(flags & WASM_SEG_FLAG_STRINGS);
456 splitStrings(data());
457 }
458
getSectionPiece(uint64_t offset)459 SectionPiece *MergeInputChunk::getSectionPiece(uint64_t offset) {
460 if (this->data().size() <= offset)
461 fatal(toString(this) + ": offset is outside the section");
462
463 // If Offset is not at beginning of a section piece, it is not in the map.
464 // In that case we need to do a binary search of the original section piece
465 // vector.
466 auto it = partition_point(
467 pieces, [=](SectionPiece p) { return p.inputOff <= offset; });
468 return &it[-1];
469 }
470
471 // Returns the offset in an output section for a given input offset.
472 // Because contents of a mergeable section is not contiguous in output,
473 // it is not just an addition to a base output offset.
getParentOffset(uint64_t offset) const474 uint64_t MergeInputChunk::getParentOffset(uint64_t offset) const {
475 // If Offset is not at beginning of a section piece, it is not in the map.
476 // In that case we need to search from the original section piece vector.
477 const SectionPiece *piece = getSectionPiece(offset);
478 uint64_t addend = offset - piece->inputOff;
479 return piece->outputOff + addend;
480 }
481
finalizeContents()482 void SyntheticMergedChunk::finalizeContents() {
483 // Add all string pieces to the string table builder to create section
484 // contents.
485 for (MergeInputChunk *sec : chunks)
486 for (size_t i = 0, e = sec->pieces.size(); i != e; ++i)
487 if (sec->pieces[i].live)
488 builder.add(sec->getData(i));
489
490 // Fix the string table content. After this, the contents will never change.
491 builder.finalize();
492
493 // finalize() fixed tail-optimized strings, so we can now get
494 // offsets of strings. Get an offset for each string and save it
495 // to a corresponding SectionPiece for easy access.
496 for (MergeInputChunk *sec : chunks)
497 for (size_t i = 0, e = sec->pieces.size(); i != e; ++i)
498 if (sec->pieces[i].live)
499 sec->pieces[i].outputOff = builder.getOffset(sec->getData(i));
500 }
501
getTombstoneForSection(StringRef name)502 uint64_t InputSection::getTombstoneForSection(StringRef name) {
503 // When a function is not live we need to update relocations referring to it.
504 // If they occur in DWARF debug symbols, we want to change the pc of the
505 // function to -1 to avoid overlapping with a valid range. However for the
506 // debug_ranges and debug_loc sections that would conflict with the existing
507 // meaning of -1 so we use -2.
508 // Returning 0 means there is no tombstone value for this section, and relocation
509 // will just use the addend.
510 if (!name.startswith(".debug_"))
511 return 0;
512 if (name.equals(".debug_ranges") || name.equals(".debug_loc"))
513 return UINT64_C(-2);
514 return UINT64_C(-1);
515 }
516
517 } // namespace wasm
518 } // namespace lld
519