1 //===- InputChunks.h --------------------------------------------*- C++ -*-===//
2 //
3 //                             The LLVM Linker
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // An InputChunks represents an indivisible opaque region of a input wasm file.
11 // i.e. a single wasm data segment or a single wasm function.
12 //
13 // They are written directly to the mmap'd output file after which relocations
14 // are applied.  Because each Chunk is independent they can be written in
15 // parallel.
16 //
17 // Chunks are also unit on which garbage collection (--gc-sections) operates.
18 //
19 //===----------------------------------------------------------------------===//
20 
21 #ifndef LLD_WASM_INPUT_CHUNKS_H
22 #define LLD_WASM_INPUT_CHUNKS_H
23 
24 #include "Config.h"
25 #include "InputFiles.h"
26 #include "lld/Common/ErrorHandler.h"
27 #include "lld/Common/LLVM.h"
28 #include "llvm/Object/Wasm.h"
29 
30 namespace lld {
31 namespace wasm {
32 
33 class ObjFile;
34 class OutputSegment;
35 
36 class InputChunk {
37 public:
38   enum Kind { DataSegment, Function, SyntheticFunction, Section };
39 
kind()40   Kind kind() const { return SectionKind; }
41 
getSize()42   virtual uint32_t getSize() const { return data().size(); }
getInputSize()43   virtual uint32_t getInputSize() const { return getSize(); };
44 
45   virtual void writeTo(uint8_t *SectionStart) const;
46 
getRelocations()47   ArrayRef<WasmRelocation> getRelocations() const { return Relocations; }
setRelocations(ArrayRef<WasmRelocation> Rs)48   void setRelocations(ArrayRef<WasmRelocation> Rs) { Relocations = Rs; }
49 
50   virtual StringRef getName() const = 0;
51   virtual StringRef getDebugName() const = 0;
52   virtual uint32_t getComdat() const = 0;
53   StringRef getComdatName() const;
54   virtual uint32_t getInputSectionOffset() const = 0;
55 
NumRelocations()56   size_t NumRelocations() const { return Relocations.size(); }
57   void writeRelocations(llvm::raw_ostream &OS) const;
58 
59   ObjFile *File;
60   int32_t OutputOffset = 0;
61 
62   // Signals that the section is part of the output.  The garbage collector,
63   // and COMDAT handling can set a sections' Live bit.
64   // If GC is disabled, all sections start out as live by default.
65   unsigned Live : 1;
66 
67 protected:
InputChunk(ObjFile * F,Kind K)68   InputChunk(ObjFile *F, Kind K)
69       : File(F), Live(!Config->GcSections), SectionKind(K) {}
70   virtual ~InputChunk() = default;
71   virtual ArrayRef<uint8_t> data() const = 0;
72 
73   // Verifies the existing data at relocation targets matches our expectations.
74   // This is performed only debug builds as an extra sanity check.
75   void verifyRelocTargets() const;
76 
77   ArrayRef<WasmRelocation> Relocations;
78   Kind SectionKind;
79 };
80 
81 // Represents a WebAssembly data segment which can be included as part of
82 // an output data segments.  Note that in WebAssembly, unlike ELF and other
83 // formats, used the term "data segment" to refer to the continous regions of
84 // memory that make on the data section. See:
85 // https://webassembly.github.io/spec/syntax/modules.html#syntax-data
86 //
87 // For example, by default, clang will produce a separate data section for
88 // each global variable.
89 class InputSegment : public InputChunk {
90 public:
InputSegment(const WasmSegment & Seg,ObjFile * F)91   InputSegment(const WasmSegment &Seg, ObjFile *F)
92       : InputChunk(F, InputChunk::DataSegment), Segment(Seg) {}
93 
classof(const InputChunk * C)94   static bool classof(const InputChunk *C) { return C->kind() == DataSegment; }
95 
getAlignment()96   uint32_t getAlignment() const { return Segment.Data.Alignment; }
getName()97   StringRef getName() const override { return Segment.Data.Name; }
getDebugName()98   StringRef getDebugName() const override { return StringRef(); }
getComdat()99   uint32_t getComdat() const override { return Segment.Data.Comdat; }
getInputSectionOffset()100   uint32_t getInputSectionOffset() const override {
101     return Segment.SectionOffset;
102   }
103 
104   const OutputSegment *OutputSeg = nullptr;
105   int32_t OutputSegmentOffset = 0;
106 
107 protected:
data()108   ArrayRef<uint8_t> data() const override { return Segment.Data.Content; }
109 
110   const WasmSegment &Segment;
111 };
112 
113 // Represents a single wasm function within and input file.  These are
114 // combined to create the final output CODE section.
115 class InputFunction : public InputChunk {
116 public:
InputFunction(const WasmSignature & S,const WasmFunction * Func,ObjFile * F)117   InputFunction(const WasmSignature &S, const WasmFunction *Func, ObjFile *F)
118       : InputChunk(F, InputChunk::Function), Signature(S), Function(Func) {}
119 
classof(const InputChunk * C)120   static bool classof(const InputChunk *C) {
121     return C->kind() == InputChunk::Function ||
122            C->kind() == InputChunk::SyntheticFunction;
123   }
124 
125   void writeTo(uint8_t *SectionStart) const override;
getName()126   StringRef getName() const override { return Function->SymbolName; }
getDebugName()127   StringRef getDebugName() const override { return Function->DebugName; }
getComdat()128   uint32_t getComdat() const override { return Function->Comdat; }
getFunctionInputOffset()129   uint32_t getFunctionInputOffset() const { return getInputSectionOffset(); }
getFunctionCodeOffset()130   uint32_t getFunctionCodeOffset() const { return Function->CodeOffset; }
getSize()131   uint32_t getSize() const override {
132     if (Config->CompressRelocations && File) {
133       assert(CompressedSize);
134       return CompressedSize;
135     }
136     return data().size();
137   }
getInputSize()138   uint32_t getInputSize() const override { return Function->Size; }
getFunctionIndex()139   uint32_t getFunctionIndex() const { return FunctionIndex.getValue(); }
hasFunctionIndex()140   bool hasFunctionIndex() const { return FunctionIndex.hasValue(); }
141   void setFunctionIndex(uint32_t Index);
getInputSectionOffset()142   uint32_t getInputSectionOffset() const override {
143     return Function->CodeSectionOffset;
144   }
getTableIndex()145   uint32_t getTableIndex() const { return TableIndex.getValue(); }
hasTableIndex()146   bool hasTableIndex() const { return TableIndex.hasValue(); }
147   void setTableIndex(uint32_t Index);
148 
149   // The size of a given input function can depend on the values of the
150   // LEB relocations within it.  This finalizeContents method is called after
151   // all the symbol values have be calcualted but before getSize() is ever
152   // called.
153   void calculateSize();
154 
155   const WasmSignature &Signature;
156 
157 protected:
data()158   ArrayRef<uint8_t> data() const override {
159     assert(!Config->CompressRelocations);
160     return File->CodeSection->Content.slice(getInputSectionOffset(),
161                                             Function->Size);
162   }
163 
164   const WasmFunction *Function;
165   llvm::Optional<uint32_t> FunctionIndex;
166   llvm::Optional<uint32_t> TableIndex;
167   uint32_t CompressedFuncSize = 0;
168   uint32_t CompressedSize = 0;
169 };
170 
171 class SyntheticFunction : public InputFunction {
172 public:
173   SyntheticFunction(const WasmSignature &S, StringRef Name,
174                     StringRef DebugName = {})
InputFunction(S,nullptr,nullptr)175       : InputFunction(S, nullptr, nullptr), Name(Name), DebugName(DebugName) {
176     SectionKind = InputChunk::SyntheticFunction;
177   }
178 
classof(const InputChunk * C)179   static bool classof(const InputChunk *C) {
180     return C->kind() == InputChunk::SyntheticFunction;
181   }
182 
getName()183   StringRef getName() const override { return Name; }
getDebugName()184   StringRef getDebugName() const override { return DebugName; }
getComdat()185   uint32_t getComdat() const override { return UINT32_MAX; }
186 
setBody(ArrayRef<uint8_t> Body_)187   void setBody(ArrayRef<uint8_t> Body_) { Body = Body_; }
188 
189 protected:
data()190   ArrayRef<uint8_t> data() const override { return Body; }
191 
192   StringRef Name;
193   StringRef DebugName;
194   ArrayRef<uint8_t> Body;
195 };
196 
197 // Represents a single Wasm Section within an input file.
198 class InputSection : public InputChunk {
199 public:
InputSection(const WasmSection & S,ObjFile * F)200   InputSection(const WasmSection &S, ObjFile *F)
201       : InputChunk(F, InputChunk::Section), Section(S) {
202     assert(Section.Type == llvm::wasm::WASM_SEC_CUSTOM);
203   }
204 
getName()205   StringRef getName() const override { return Section.Name; }
getDebugName()206   StringRef getDebugName() const override { return StringRef(); }
getComdat()207   uint32_t getComdat() const override { return UINT32_MAX; }
208 
209 protected:
data()210   ArrayRef<uint8_t> data() const override { return Section.Content; }
211 
212   // Offset within the input section.  This is only zero since this chunk
213   // type represents an entire input section, not part of one.
getInputSectionOffset()214   uint32_t getInputSectionOffset() const override { return 0; }
215 
216   const WasmSection &Section;
217 };
218 
219 } // namespace wasm
220 
221 std::string toString(const wasm::InputChunk *);
222 } // namespace lld
223 
224 #endif // LLD_WASM_INPUT_CHUNKS_H
225