1 //===- LinkerScript.h -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_ELF_LINKER_SCRIPT_H
10 #define LLD_ELF_LINKER_SCRIPT_H
11 
12 #include "Config.h"
13 #include "Writer.h"
14 #include "lld/Common/LLVM.h"
15 #include "lld/Common/Strings.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/MapVector.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Support/Compiler.h"
21 #include <cstddef>
22 #include <cstdint>
23 #include <functional>
24 #include <memory>
25 
26 namespace lld::elf {
27 
28 class Defined;
29 class InputFile;
30 class InputSection;
31 class InputSectionBase;
32 class OutputSection;
33 class SectionBase;
34 class ThunkSection;
35 struct OutputDesc;
36 
37 // This represents an r-value in the linker script.
38 struct ExprValue {
39   ExprValue(SectionBase *sec, bool forceAbsolute, uint64_t val,
40             const Twine &loc)
41       : sec(sec), val(val), forceAbsolute(forceAbsolute), loc(loc.str()) {}
42 
43   ExprValue(uint64_t val) : ExprValue(nullptr, false, val, "") {}
44 
45   bool isAbsolute() const { return forceAbsolute || sec == nullptr; }
46   uint64_t getValue() const;
47   uint64_t getSecAddr() const;
48   uint64_t getSectionOffset() const;
49 
50   // If a value is relative to a section, it has a non-null Sec.
51   SectionBase *sec;
52 
53   uint64_t val;
54   uint64_t alignment = 1;
55 
56   // The original st_type if the expression represents a symbol. Any operation
57   // resets type to STT_NOTYPE.
58   uint8_t type = llvm::ELF::STT_NOTYPE;
59 
60   // True if this expression is enclosed in ABSOLUTE().
61   // This flag affects the return value of getValue().
62   bool forceAbsolute;
63 
64   // Original source location. Used for error messages.
65   std::string loc;
66 };
67 
68 // This represents an expression in the linker script.
69 // ScriptParser::readExpr reads an expression and returns an Expr.
70 // Later, we evaluate the expression by calling the function.
71 using Expr = std::function<ExprValue()>;
72 
73 // This enum is used to implement linker script SECTIONS command.
74 // https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS
75 enum SectionsCommandKind {
76   AssignmentKind, // . = expr or <sym> = expr
77   OutputSectionKind,
78   InputSectionKind,
79   ByteKind    // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr)
80 };
81 
82 struct SectionCommand {
83   SectionCommand(int k) : kind(k) {}
84   int kind;
85 };
86 
87 // This represents ". = <expr>" or "<symbol> = <expr>".
88 struct SymbolAssignment : SectionCommand {
89   SymbolAssignment(StringRef name, Expr e, std::string loc)
90       : SectionCommand(AssignmentKind), name(name), expression(e),
91         location(loc) {}
92 
93   static bool classof(const SectionCommand *c) {
94     return c->kind == AssignmentKind;
95   }
96 
97   // The LHS of an expression. Name is either a symbol name or ".".
98   StringRef name;
99   Defined *sym = nullptr;
100 
101   // The RHS of an expression.
102   Expr expression;
103 
104   // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN.
105   bool provide = false;
106   bool hidden = false;
107 
108   // Holds file name and line number for error reporting.
109   std::string location;
110 
111   // A string representation of this command. We use this for -Map.
112   std::string commandString;
113 
114   // Address of this assignment command.
115   uint64_t addr;
116 
117   // Size of this assignment command. This is usually 0, but if
118   // you move '.' this may be greater than 0.
119   uint64_t size;
120 };
121 
122 // Linker scripts allow additional constraints to be put on output sections.
123 // If an output section is marked as ONLY_IF_RO, the section is created
124 // only if its input sections are read-only. Likewise, an output section
125 // with ONLY_IF_RW is created if all input sections are RW.
126 enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite };
127 
128 // This struct is used to represent the location and size of regions of
129 // target memory. Instances of the struct are created by parsing the
130 // MEMORY command.
131 struct MemoryRegion {
132   MemoryRegion(StringRef name, Expr origin, Expr length, uint32_t flags,
133                uint32_t invFlags, uint32_t negFlags, uint32_t negInvFlags)
134       : name(std::string(name)), origin(origin), length(length), flags(flags),
135         invFlags(invFlags), negFlags(negFlags), negInvFlags(negInvFlags) {}
136 
137   std::string name;
138   Expr origin;
139   Expr length;
140   // A section can be assigned to the region if any of these ELF section flags
141   // are set...
142   uint32_t flags;
143   // ... or any of these flags are not set.
144   // For example, the memory region attribute "r" maps to SHF_WRITE.
145   uint32_t invFlags;
146   // A section cannot be assigned to the region if any of these ELF section
147   // flags are set...
148   uint32_t negFlags;
149   // ... or any of these flags are not set.
150   // For example, the memory region attribute "!r" maps to SHF_WRITE.
151   uint32_t negInvFlags;
152   uint64_t curPos = 0;
153 
154   uint64_t getOrigin() const { return origin().getValue(); }
155   uint64_t getLength() const { return length().getValue(); }
156 
157   bool compatibleWith(uint32_t secFlags) const {
158     if ((secFlags & negFlags) || (~secFlags & negInvFlags))
159       return false;
160     return (secFlags & flags) || (~secFlags & invFlags);
161   }
162 };
163 
164 // This struct represents one section match pattern in SECTIONS() command.
165 // It can optionally have negative match pattern for EXCLUDED_FILE command.
166 // Also it may be surrounded with SORT() command, so contains sorting rules.
167 class SectionPattern {
168   StringMatcher excludedFilePat;
169 
170   // Cache of the most recent input argument and result of excludesFile().
171   mutable std::optional<std::pair<const InputFile *, bool>> excludesFileCache;
172 
173 public:
174   SectionPattern(StringMatcher &&pat1, StringMatcher &&pat2)
175       : excludedFilePat(pat1), sectionPat(pat2),
176         sortOuter(SortSectionPolicy::Default),
177         sortInner(SortSectionPolicy::Default) {}
178 
179   bool excludesFile(const InputFile *file) const;
180 
181   StringMatcher sectionPat;
182   SortSectionPolicy sortOuter;
183   SortSectionPolicy sortInner;
184 };
185 
186 class InputSectionDescription : public SectionCommand {
187   SingleStringMatcher filePat;
188 
189   // Cache of the most recent input argument and result of matchesFile().
190   mutable std::optional<std::pair<const InputFile *, bool>> matchesFileCache;
191 
192 public:
193   InputSectionDescription(StringRef filePattern, uint64_t withFlags = 0,
194                           uint64_t withoutFlags = 0)
195       : SectionCommand(InputSectionKind), filePat(filePattern),
196         withFlags(withFlags), withoutFlags(withoutFlags) {}
197 
198   static bool classof(const SectionCommand *c) {
199     return c->kind == InputSectionKind;
200   }
201 
202   bool matchesFile(const InputFile *file) const;
203 
204   // Input sections that matches at least one of SectionPatterns
205   // will be associated with this InputSectionDescription.
206   SmallVector<SectionPattern, 0> sectionPatterns;
207 
208   // Includes InputSections and MergeInputSections. Used temporarily during
209   // assignment of input sections to output sections.
210   SmallVector<InputSectionBase *, 0> sectionBases;
211 
212   // Used after the finalizeInputSections() pass. MergeInputSections have been
213   // merged into MergeSyntheticSections.
214   SmallVector<InputSection *, 0> sections;
215 
216   // Temporary record of synthetic ThunkSection instances and the pass that
217   // they were created in. This is used to insert newly created ThunkSections
218   // into Sections at the end of a createThunks() pass.
219   SmallVector<std::pair<ThunkSection *, uint32_t>, 0> thunkSections;
220 
221   // SectionPatterns can be filtered with the INPUT_SECTION_FLAGS command.
222   uint64_t withFlags;
223   uint64_t withoutFlags;
224 };
225 
226 // Represents BYTE(), SHORT(), LONG(), or QUAD().
227 struct ByteCommand : SectionCommand {
228   ByteCommand(Expr e, unsigned size, std::string commandString)
229       : SectionCommand(ByteKind), commandString(commandString), expression(e),
230         size(size) {}
231 
232   static bool classof(const SectionCommand *c) { return c->kind == ByteKind; }
233 
234   // Keeps string representing the command. Used for -Map" is perhaps better.
235   std::string commandString;
236 
237   Expr expression;
238 
239   // This is just an offset of this assignment command in the output section.
240   unsigned offset;
241 
242   // Size of this data command.
243   unsigned size;
244 };
245 
246 struct InsertCommand {
247   SmallVector<StringRef, 0> names;
248   bool isAfter;
249   StringRef where;
250 };
251 
252 struct PhdrsCommand {
253   StringRef name;
254   unsigned type = llvm::ELF::PT_NULL;
255   bool hasFilehdr = false;
256   bool hasPhdrs = false;
257   std::optional<unsigned> flags;
258   Expr lmaExpr = nullptr;
259 };
260 
261 class LinkerScript final {
262   // Temporary state used in processSectionCommands() and assignAddresses()
263   // that must be reinitialized for each call to the above functions, and must
264   // not be used outside of the scope of a call to the above functions.
265   struct AddressState {
266     AddressState();
267     OutputSection *outSec = nullptr;
268     MemoryRegion *memRegion = nullptr;
269     MemoryRegion *lmaRegion = nullptr;
270     uint64_t lmaOffset = 0;
271     uint64_t tbssAddr = 0;
272   };
273 
274   llvm::DenseMap<llvm::CachedHashStringRef, OutputDesc *> nameToOutputSection;
275 
276   void addSymbol(SymbolAssignment *cmd);
277   void assignSymbol(SymbolAssignment *cmd, bool inSec);
278   void setDot(Expr e, const Twine &loc, bool inSec);
279   void expandOutputSection(uint64_t size);
280   void expandMemoryRegions(uint64_t size);
281 
282   SmallVector<InputSectionBase *, 0>
283   computeInputSections(const InputSectionDescription *,
284                        ArrayRef<InputSectionBase *>);
285 
286   SmallVector<InputSectionBase *, 0> createInputSectionList(OutputSection &cmd);
287 
288   void discardSynthetic(OutputSection &);
289 
290   SmallVector<size_t, 0> getPhdrIndices(OutputSection *sec);
291 
292   std::pair<MemoryRegion *, MemoryRegion *>
293   findMemoryRegion(OutputSection *sec, MemoryRegion *hint);
294 
295   void assignOffsets(OutputSection *sec);
296 
297   // This captures the local AddressState and makes it accessible
298   // deliberately. This is needed as there are some cases where we cannot just
299   // thread the current state through to a lambda function created by the
300   // script parser.
301   // This should remain a plain pointer as its lifetime is smaller than
302   // LinkerScript.
303   AddressState *state = nullptr;
304 
305   OutputSection *aether;
306 
307   uint64_t dot;
308 
309 public:
310   OutputDesc *createOutputSection(StringRef name, StringRef location);
311   OutputDesc *getOrCreateOutputSection(StringRef name);
312 
313   bool hasPhdrsCommands() { return !phdrsCommands.empty(); }
314   uint64_t getDot() { return dot; }
315   void discard(InputSectionBase &s);
316 
317   ExprValue getSymbolValue(StringRef name, const Twine &loc);
318 
319   void addOrphanSections();
320   void diagnoseOrphanHandling() const;
321   void diagnoseMissingSGSectionAddress() const;
322   void adjustOutputSections();
323   void adjustSectionsAfterSorting();
324 
325   SmallVector<PhdrEntry *, 0> createPhdrs();
326   bool needsInterpSection();
327 
328   bool shouldKeep(InputSectionBase *s);
329   const Defined *assignAddresses();
330   void allocateHeaders(SmallVector<PhdrEntry *, 0> &phdrs);
331   void processSectionCommands();
332   void processSymbolAssignments();
333   void declareSymbols();
334 
335   bool isDiscarded(const OutputSection *sec) const;
336 
337   // Used to handle INSERT AFTER statements.
338   void processInsertCommands();
339 
340   // Describe memory region usage.
341   void printMemoryUsage(raw_ostream &os);
342 
343   // Verify memory/lma overflows.
344   void checkMemoryRegions() const;
345 
346   // SECTIONS command list.
347   SmallVector<SectionCommand *, 0> sectionCommands;
348 
349   // PHDRS command list.
350   SmallVector<PhdrsCommand, 0> phdrsCommands;
351 
352   bool hasSectionsCommand = false;
353   bool errorOnMissingSection = false;
354 
355   // List of section patterns specified with KEEP commands. They will
356   // be kept even if they are unused and --gc-sections is specified.
357   SmallVector<InputSectionDescription *, 0> keptSections;
358 
359   // A map from memory region name to a memory region descriptor.
360   llvm::MapVector<llvm::StringRef, MemoryRegion *> memoryRegions;
361 
362   // A list of symbols referenced by the script.
363   SmallVector<llvm::StringRef, 0> referencedSymbols;
364 
365   // Used to implement INSERT [AFTER|BEFORE]. Contains output sections that need
366   // to be reordered.
367   SmallVector<InsertCommand, 0> insertCommands;
368 
369   // OutputSections specified by OVERWRITE_SECTIONS.
370   SmallVector<OutputDesc *, 0> overwriteSections;
371 
372   // Sections that will be warned/errored by --orphan-handling.
373   SmallVector<const InputSectionBase *, 0> orphanSections;
374 };
375 
376 LLVM_LIBRARY_VISIBILITY extern std::unique_ptr<LinkerScript> script;
377 
378 } // end namespace lld::elf
379 
380 #endif // LLD_ELF_LINKER_SCRIPT_H
381