1 //===- LinkerScript.h -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_ELF_LINKER_SCRIPT_H
10 #define LLD_ELF_LINKER_SCRIPT_H
11 
12 #include "Config.h"
13 #include "Writer.h"
14 #include "lld/Common/LLVM.h"
15 #include "lld/Common/Strings.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/MapVector.h"
19 #include "llvm/ADT/StringRef.h"
20 #include <cstddef>
21 #include <cstdint>
22 #include <functional>
23 #include <memory>
24 
25 namespace lld {
26 namespace elf {
27 
28 class Defined;
29 class InputFile;
30 class InputSection;
31 class InputSectionBase;
32 class OutputSection;
33 class SectionBase;
34 class ThunkSection;
35 struct OutputDesc;
36 
37 // This represents an r-value in the linker script.
38 struct ExprValue {
39   ExprValue(SectionBase *sec, bool forceAbsolute, uint64_t val,
40             const Twine &loc)
41       : sec(sec), val(val), forceAbsolute(forceAbsolute), loc(loc.str()) {}
42 
43   ExprValue(uint64_t val) : ExprValue(nullptr, false, val, "") {}
44 
45   bool isAbsolute() const { return forceAbsolute || sec == nullptr; }
46   uint64_t getValue() const;
47   uint64_t getSecAddr() const;
48   uint64_t getSectionOffset() const;
49 
50   // If a value is relative to a section, it has a non-null Sec.
51   SectionBase *sec;
52 
53   uint64_t val;
54   uint64_t alignment = 1;
55 
56   // The original st_type if the expression represents a symbol. Any operation
57   // resets type to STT_NOTYPE.
58   uint8_t type = llvm::ELF::STT_NOTYPE;
59 
60   // True if this expression is enclosed in ABSOLUTE().
61   // This flag affects the return value of getValue().
62   bool forceAbsolute;
63 
64   // Original source location. Used for error messages.
65   std::string loc;
66 };
67 
68 // This represents an expression in the linker script.
69 // ScriptParser::readExpr reads an expression and returns an Expr.
70 // Later, we evaluate the expression by calling the function.
71 using Expr = std::function<ExprValue()>;
72 
73 // This enum is used to implement linker script SECTIONS command.
74 // https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS
75 enum SectionsCommandKind {
76   AssignmentKind, // . = expr or <sym> = expr
77   OutputSectionKind,
78   InputSectionKind,
79   ByteKind    // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr)
80 };
81 
82 struct SectionCommand {
83   SectionCommand(int k) : kind(k) {}
84   int kind;
85 };
86 
87 // This represents ". = <expr>" or "<symbol> = <expr>".
88 struct SymbolAssignment : SectionCommand {
89   SymbolAssignment(StringRef name, Expr e, std::string loc)
90       : SectionCommand(AssignmentKind), name(name), expression(e),
91         location(loc) {}
92 
93   static bool classof(const SectionCommand *c) {
94     return c->kind == AssignmentKind;
95   }
96 
97   // The LHS of an expression. Name is either a symbol name or ".".
98   StringRef name;
99   Defined *sym = nullptr;
100 
101   // The RHS of an expression.
102   Expr expression;
103 
104   // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN.
105   bool provide = false;
106   bool hidden = false;
107 
108   // Holds file name and line number for error reporting.
109   std::string location;
110 
111   // A string representation of this command. We use this for -Map.
112   std::string commandString;
113 
114   // Address of this assignment command.
115   uint64_t addr;
116 
117   // Size of this assignment command. This is usually 0, but if
118   // you move '.' this may be greater than 0.
119   uint64_t size;
120 };
121 
122 // Linker scripts allow additional constraints to be put on output sections.
123 // If an output section is marked as ONLY_IF_RO, the section is created
124 // only if its input sections are read-only. Likewise, an output section
125 // with ONLY_IF_RW is created if all input sections are RW.
126 enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite };
127 
128 // This struct is used to represent the location and size of regions of
129 // target memory. Instances of the struct are created by parsing the
130 // MEMORY command.
131 struct MemoryRegion {
132   MemoryRegion(StringRef name, Expr origin, Expr length, uint32_t flags,
133                uint32_t invFlags, uint32_t negFlags, uint32_t negInvFlags)
134       : name(std::string(name)), origin(origin), length(length), flags(flags),
135         invFlags(invFlags), negFlags(negFlags), negInvFlags(negInvFlags) {}
136 
137   std::string name;
138   Expr origin;
139   Expr length;
140   // A section can be assigned to the region if any of these ELF section flags
141   // are set...
142   uint32_t flags;
143   // ... or any of these flags are not set.
144   // For example, the memory region attribute "r" maps to SHF_WRITE.
145   uint32_t invFlags;
146   // A section cannot be assigned to the region if any of these ELF section
147   // flags are set...
148   uint32_t negFlags;
149   // ... or any of these flags are not set.
150   // For example, the memory region attribute "!r" maps to SHF_WRITE.
151   uint32_t negInvFlags;
152   uint64_t curPos = 0;
153 
154   bool compatibleWith(uint32_t secFlags) const {
155     if ((secFlags & negFlags) || (~secFlags & negInvFlags))
156       return false;
157     return (secFlags & flags) || (~secFlags & invFlags);
158   }
159 };
160 
161 // This struct represents one section match pattern in SECTIONS() command.
162 // It can optionally have negative match pattern for EXCLUDED_FILE command.
163 // Also it may be surrounded with SORT() command, so contains sorting rules.
164 class SectionPattern {
165   StringMatcher excludedFilePat;
166 
167   // Cache of the most recent input argument and result of excludesFile().
168   mutable llvm::Optional<std::pair<const InputFile *, bool>> excludesFileCache;
169 
170 public:
171   SectionPattern(StringMatcher &&pat1, StringMatcher &&pat2)
172       : excludedFilePat(pat1), sectionPat(pat2),
173         sortOuter(SortSectionPolicy::Default),
174         sortInner(SortSectionPolicy::Default) {}
175 
176   bool excludesFile(const InputFile *file) const;
177 
178   StringMatcher sectionPat;
179   SortSectionPolicy sortOuter;
180   SortSectionPolicy sortInner;
181 };
182 
183 class InputSectionDescription : public SectionCommand {
184   SingleStringMatcher filePat;
185 
186   // Cache of the most recent input argument and result of matchesFile().
187   mutable llvm::Optional<std::pair<const InputFile *, bool>> matchesFileCache;
188 
189 public:
190   InputSectionDescription(StringRef filePattern, uint64_t withFlags = 0,
191                           uint64_t withoutFlags = 0)
192       : SectionCommand(InputSectionKind), filePat(filePattern),
193         withFlags(withFlags), withoutFlags(withoutFlags) {}
194 
195   static bool classof(const SectionCommand *c) {
196     return c->kind == InputSectionKind;
197   }
198 
199   bool matchesFile(const InputFile *file) const;
200 
201   // Input sections that matches at least one of SectionPatterns
202   // will be associated with this InputSectionDescription.
203   SmallVector<SectionPattern, 0> sectionPatterns;
204 
205   // Includes InputSections and MergeInputSections. Used temporarily during
206   // assignment of input sections to output sections.
207   SmallVector<InputSectionBase *, 0> sectionBases;
208 
209   // Used after the finalizeInputSections() pass. MergeInputSections have been
210   // merged into MergeSyntheticSections.
211   SmallVector<InputSection *, 0> sections;
212 
213   // Temporary record of synthetic ThunkSection instances and the pass that
214   // they were created in. This is used to insert newly created ThunkSections
215   // into Sections at the end of a createThunks() pass.
216   SmallVector<std::pair<ThunkSection *, uint32_t>, 0> thunkSections;
217 
218   // SectionPatterns can be filtered with the INPUT_SECTION_FLAGS command.
219   uint64_t withFlags;
220   uint64_t withoutFlags;
221 };
222 
223 // Represents BYTE(), SHORT(), LONG(), or QUAD().
224 struct ByteCommand : SectionCommand {
225   ByteCommand(Expr e, unsigned size, std::string commandString)
226       : SectionCommand(ByteKind), commandString(commandString), expression(e),
227         size(size) {}
228 
229   static bool classof(const SectionCommand *c) { return c->kind == ByteKind; }
230 
231   // Keeps string representing the command. Used for -Map" is perhaps better.
232   std::string commandString;
233 
234   Expr expression;
235 
236   // This is just an offset of this assignment command in the output section.
237   unsigned offset;
238 
239   // Size of this data command.
240   unsigned size;
241 };
242 
243 struct InsertCommand {
244   SmallVector<StringRef, 0> names;
245   bool isAfter;
246   StringRef where;
247 };
248 
249 struct PhdrsCommand {
250   StringRef name;
251   unsigned type = llvm::ELF::PT_NULL;
252   bool hasFilehdr = false;
253   bool hasPhdrs = false;
254   llvm::Optional<unsigned> flags;
255   Expr lmaExpr = nullptr;
256 };
257 
258 class LinkerScript final {
259   // Temporary state used in processSectionCommands() and assignAddresses()
260   // that must be reinitialized for each call to the above functions, and must
261   // not be used outside of the scope of a call to the above functions.
262   struct AddressState {
263     AddressState();
264     OutputSection *outSec = nullptr;
265     MemoryRegion *memRegion = nullptr;
266     MemoryRegion *lmaRegion = nullptr;
267     uint64_t lmaOffset = 0;
268     uint64_t tbssAddr = 0;
269   };
270 
271   llvm::DenseMap<llvm::CachedHashStringRef, OutputDesc *> nameToOutputSection;
272 
273   void addSymbol(SymbolAssignment *cmd);
274   void assignSymbol(SymbolAssignment *cmd, bool inSec);
275   void setDot(Expr e, const Twine &loc, bool inSec);
276   void expandOutputSection(uint64_t size);
277   void expandMemoryRegions(uint64_t size);
278 
279   SmallVector<InputSectionBase *, 0>
280   computeInputSections(const InputSectionDescription *,
281                        ArrayRef<InputSectionBase *>);
282 
283   SmallVector<InputSectionBase *, 0> createInputSectionList(OutputSection &cmd);
284 
285   void discardSynthetic(OutputSection &);
286 
287   SmallVector<size_t, 0> getPhdrIndices(OutputSection *sec);
288 
289   std::pair<MemoryRegion *, MemoryRegion *>
290   findMemoryRegion(OutputSection *sec, MemoryRegion *hint);
291 
292   void assignOffsets(OutputSection *sec);
293 
294   // Ctx captures the local AddressState and makes it accessible
295   // deliberately. This is needed as there are some cases where we cannot just
296   // thread the current state through to a lambda function created by the
297   // script parser.
298   // This should remain a plain pointer as its lifetime is smaller than
299   // LinkerScript.
300   AddressState *ctx = nullptr;
301 
302   OutputSection *aether;
303 
304   uint64_t dot;
305 
306 public:
307   OutputDesc *createOutputSection(StringRef name, StringRef location);
308   OutputDesc *getOrCreateOutputSection(StringRef name);
309 
310   bool hasPhdrsCommands() { return !phdrsCommands.empty(); }
311   uint64_t getDot() { return dot; }
312   void discard(InputSectionBase &s);
313 
314   ExprValue getSymbolValue(StringRef name, const Twine &loc);
315 
316   void addOrphanSections();
317   void diagnoseOrphanHandling() const;
318   void adjustOutputSections();
319   void adjustSectionsAfterSorting();
320 
321   SmallVector<PhdrEntry *, 0> createPhdrs();
322   bool needsInterpSection();
323 
324   bool shouldKeep(InputSectionBase *s);
325   const Defined *assignAddresses();
326   void allocateHeaders(SmallVector<PhdrEntry *, 0> &phdrs);
327   void processSectionCommands();
328   void processSymbolAssignments();
329   void declareSymbols();
330 
331   bool isDiscarded(const OutputSection *sec) const;
332 
333   // Used to handle INSERT AFTER statements.
334   void processInsertCommands();
335 
336   // SECTIONS command list.
337   SmallVector<SectionCommand *, 0> sectionCommands;
338 
339   // PHDRS command list.
340   SmallVector<PhdrsCommand, 0> phdrsCommands;
341 
342   bool hasSectionsCommand = false;
343   bool errorOnMissingSection = false;
344 
345   // List of section patterns specified with KEEP commands. They will
346   // be kept even if they are unused and --gc-sections is specified.
347   SmallVector<InputSectionDescription *, 0> keptSections;
348 
349   // A map from memory region name to a memory region descriptor.
350   llvm::MapVector<llvm::StringRef, MemoryRegion *> memoryRegions;
351 
352   // A list of symbols referenced by the script.
353   SmallVector<llvm::StringRef, 0> referencedSymbols;
354 
355   // Used to implement INSERT [AFTER|BEFORE]. Contains output sections that need
356   // to be reordered.
357   SmallVector<InsertCommand, 0> insertCommands;
358 
359   // OutputSections specified by OVERWRITE_SECTIONS.
360   SmallVector<OutputDesc *, 0> overwriteSections;
361 
362   // Sections that will be warned/errored by --orphan-handling.
363   SmallVector<const InputSectionBase *, 0> orphanSections;
364 };
365 
366 extern std::unique_ptr<LinkerScript> script;
367 
368 } // end namespace elf
369 } // end namespace lld
370 
371 #endif // LLD_ELF_LINKER_SCRIPT_H
372