1 //===- LinkerScript.h -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_ELF_LINKER_SCRIPT_H
10 #define LLD_ELF_LINKER_SCRIPT_H
11 
12 #include "Config.h"
13 #include "Writer.h"
14 #include "lld/Common/LLVM.h"
15 #include "lld/Common/Strings.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/MapVector.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Support/Compiler.h"
21 #include <cstddef>
22 #include <cstdint>
23 #include <functional>
24 #include <memory>
25 
26 namespace lld::elf {
27 
28 class Defined;
29 class InputFile;
30 class InputSection;
31 class InputSectionBase;
32 class OutputSection;
33 class SectionBase;
34 class ThunkSection;
35 struct OutputDesc;
36 
37 // This represents an r-value in the linker script.
38 struct ExprValue {
39   ExprValue(SectionBase *sec, bool forceAbsolute, uint64_t val,
40             const Twine &loc)
41       : sec(sec), val(val), forceAbsolute(forceAbsolute), loc(loc.str()) {}
42 
43   ExprValue(uint64_t val) : ExprValue(nullptr, false, val, "") {}
44 
45   bool isAbsolute() const { return forceAbsolute || sec == nullptr; }
46   uint64_t getValue() const;
47   uint64_t getSecAddr() const;
48   uint64_t getSectionOffset() const;
49 
50   // If a value is relative to a section, it has a non-null Sec.
51   SectionBase *sec;
52 
53   uint64_t val;
54   uint64_t alignment = 1;
55 
56   // The original st_type if the expression represents a symbol. Any operation
57   // resets type to STT_NOTYPE.
58   uint8_t type = llvm::ELF::STT_NOTYPE;
59 
60   // True if this expression is enclosed in ABSOLUTE().
61   // This flag affects the return value of getValue().
62   bool forceAbsolute;
63 
64   // Original source location. Used for error messages.
65   std::string loc;
66 };
67 
68 // This represents an expression in the linker script.
69 // ScriptParser::readExpr reads an expression and returns an Expr.
70 // Later, we evaluate the expression by calling the function.
71 using Expr = std::function<ExprValue()>;
72 
73 // This enum is used to implement linker script SECTIONS command.
74 // https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS
75 enum SectionsCommandKind {
76   AssignmentKind, // . = expr or <sym> = expr
77   OutputSectionKind,
78   InputSectionKind,
79   ByteKind    // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr)
80 };
81 
82 struct SectionCommand {
83   SectionCommand(int k) : kind(k) {}
84   int kind;
85 };
86 
87 // This represents ". = <expr>" or "<symbol> = <expr>".
88 struct SymbolAssignment : SectionCommand {
89   SymbolAssignment(StringRef name, Expr e, unsigned symOrder, std::string loc)
90       : SectionCommand(AssignmentKind), name(name), expression(e),
91         symOrder(symOrder), location(loc) {}
92 
93   static bool classof(const SectionCommand *c) {
94     return c->kind == AssignmentKind;
95   }
96 
97   // The LHS of an expression. Name is either a symbol name or ".".
98   StringRef name;
99   Defined *sym = nullptr;
100 
101   // The RHS of an expression.
102   Expr expression;
103 
104   // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN.
105   bool provide = false;
106   bool hidden = false;
107 
108   // This assignment references DATA_SEGMENT_RELRO_END.
109   bool dataSegmentRelroEnd = false;
110 
111   unsigned symOrder;
112 
113   // Holds file name and line number for error reporting.
114   std::string location;
115 
116   // A string representation of this command. We use this for -Map.
117   std::string commandString;
118 
119   // Address of this assignment command.
120   uint64_t addr;
121 
122   // Size of this assignment command. This is usually 0, but if
123   // you move '.' this may be greater than 0.
124   uint64_t size;
125 };
126 
127 // Linker scripts allow additional constraints to be put on output sections.
128 // If an output section is marked as ONLY_IF_RO, the section is created
129 // only if its input sections are read-only. Likewise, an output section
130 // with ONLY_IF_RW is created if all input sections are RW.
131 enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite };
132 
133 // This struct is used to represent the location and size of regions of
134 // target memory. Instances of the struct are created by parsing the
135 // MEMORY command.
136 struct MemoryRegion {
137   MemoryRegion(StringRef name, Expr origin, Expr length, uint32_t flags,
138                uint32_t invFlags, uint32_t negFlags, uint32_t negInvFlags)
139       : name(std::string(name)), origin(origin), length(length), flags(flags),
140         invFlags(invFlags), negFlags(negFlags), negInvFlags(negInvFlags) {}
141 
142   std::string name;
143   Expr origin;
144   Expr length;
145   // A section can be assigned to the region if any of these ELF section flags
146   // are set...
147   uint32_t flags;
148   // ... or any of these flags are not set.
149   // For example, the memory region attribute "r" maps to SHF_WRITE.
150   uint32_t invFlags;
151   // A section cannot be assigned to the region if any of these ELF section
152   // flags are set...
153   uint32_t negFlags;
154   // ... or any of these flags are not set.
155   // For example, the memory region attribute "!r" maps to SHF_WRITE.
156   uint32_t negInvFlags;
157   uint64_t curPos = 0;
158 
159   uint64_t getOrigin() const { return origin().getValue(); }
160   uint64_t getLength() const { return length().getValue(); }
161 
162   bool compatibleWith(uint32_t secFlags) const {
163     if ((secFlags & negFlags) || (~secFlags & negInvFlags))
164       return false;
165     return (secFlags & flags) || (~secFlags & invFlags);
166   }
167 };
168 
169 // This struct represents one section match pattern in SECTIONS() command.
170 // It can optionally have negative match pattern for EXCLUDED_FILE command.
171 // Also it may be surrounded with SORT() command, so contains sorting rules.
172 class SectionPattern {
173   StringMatcher excludedFilePat;
174 
175   // Cache of the most recent input argument and result of excludesFile().
176   mutable std::optional<std::pair<const InputFile *, bool>> excludesFileCache;
177 
178 public:
179   SectionPattern(StringMatcher &&pat1, StringMatcher &&pat2)
180       : excludedFilePat(pat1), sectionPat(pat2),
181         sortOuter(SortSectionPolicy::Default),
182         sortInner(SortSectionPolicy::Default) {}
183 
184   bool excludesFile(const InputFile *file) const;
185 
186   StringMatcher sectionPat;
187   SortSectionPolicy sortOuter;
188   SortSectionPolicy sortInner;
189 };
190 
191 class InputSectionDescription : public SectionCommand {
192   SingleStringMatcher filePat;
193 
194   // Cache of the most recent input argument and result of matchesFile().
195   mutable std::optional<std::pair<const InputFile *, bool>> matchesFileCache;
196 
197 public:
198   InputSectionDescription(StringRef filePattern, uint64_t withFlags = 0,
199                           uint64_t withoutFlags = 0)
200       : SectionCommand(InputSectionKind), filePat(filePattern),
201         withFlags(withFlags), withoutFlags(withoutFlags) {}
202 
203   static bool classof(const SectionCommand *c) {
204     return c->kind == InputSectionKind;
205   }
206 
207   bool matchesFile(const InputFile *file) const;
208 
209   // Input sections that matches at least one of SectionPatterns
210   // will be associated with this InputSectionDescription.
211   SmallVector<SectionPattern, 0> sectionPatterns;
212 
213   // Includes InputSections and MergeInputSections. Used temporarily during
214   // assignment of input sections to output sections.
215   SmallVector<InputSectionBase *, 0> sectionBases;
216 
217   // Used after the finalizeInputSections() pass. MergeInputSections have been
218   // merged into MergeSyntheticSections.
219   SmallVector<InputSection *, 0> sections;
220 
221   // Temporary record of synthetic ThunkSection instances and the pass that
222   // they were created in. This is used to insert newly created ThunkSections
223   // into Sections at the end of a createThunks() pass.
224   SmallVector<std::pair<ThunkSection *, uint32_t>, 0> thunkSections;
225 
226   // SectionPatterns can be filtered with the INPUT_SECTION_FLAGS command.
227   uint64_t withFlags;
228   uint64_t withoutFlags;
229 };
230 
231 // Represents BYTE(), SHORT(), LONG(), or QUAD().
232 struct ByteCommand : SectionCommand {
233   ByteCommand(Expr e, unsigned size, std::string commandString)
234       : SectionCommand(ByteKind), commandString(commandString), expression(e),
235         size(size) {}
236 
237   static bool classof(const SectionCommand *c) { return c->kind == ByteKind; }
238 
239   // Keeps string representing the command. Used for -Map" is perhaps better.
240   std::string commandString;
241 
242   Expr expression;
243 
244   // This is just an offset of this assignment command in the output section.
245   unsigned offset;
246 
247   // Size of this data command.
248   unsigned size;
249 };
250 
251 struct InsertCommand {
252   SmallVector<StringRef, 0> names;
253   bool isAfter;
254   StringRef where;
255 };
256 
257 struct PhdrsCommand {
258   StringRef name;
259   unsigned type = llvm::ELF::PT_NULL;
260   bool hasFilehdr = false;
261   bool hasPhdrs = false;
262   std::optional<unsigned> flags;
263   Expr lmaExpr = nullptr;
264 };
265 
266 class LinkerScript final {
267   // Temporary state used in processSectionCommands() and assignAddresses()
268   // that must be reinitialized for each call to the above functions, and must
269   // not be used outside of the scope of a call to the above functions.
270   struct AddressState {
271     AddressState();
272     OutputSection *outSec = nullptr;
273     MemoryRegion *memRegion = nullptr;
274     MemoryRegion *lmaRegion = nullptr;
275     uint64_t lmaOffset = 0;
276     uint64_t tbssAddr = 0;
277   };
278 
279   llvm::DenseMap<llvm::CachedHashStringRef, OutputDesc *> nameToOutputSection;
280 
281   void addSymbol(SymbolAssignment *cmd);
282   void assignSymbol(SymbolAssignment *cmd, bool inSec);
283   void setDot(Expr e, const Twine &loc, bool inSec);
284   void expandOutputSection(uint64_t size);
285   void expandMemoryRegions(uint64_t size);
286 
287   SmallVector<InputSectionBase *, 0>
288   computeInputSections(const InputSectionDescription *,
289                        ArrayRef<InputSectionBase *>);
290 
291   SmallVector<InputSectionBase *, 0> createInputSectionList(OutputSection &cmd);
292 
293   void discardSynthetic(OutputSection &);
294 
295   SmallVector<size_t, 0> getPhdrIndices(OutputSection *sec);
296 
297   std::pair<MemoryRegion *, MemoryRegion *>
298   findMemoryRegion(OutputSection *sec, MemoryRegion *hint);
299 
300   void assignOffsets(OutputSection *sec);
301 
302   // This captures the local AddressState and makes it accessible
303   // deliberately. This is needed as there are some cases where we cannot just
304   // thread the current state through to a lambda function created by the
305   // script parser.
306   // This should remain a plain pointer as its lifetime is smaller than
307   // LinkerScript.
308   AddressState *state = nullptr;
309 
310   OutputSection *aether;
311 
312   uint64_t dot;
313 
314 public:
315   OutputDesc *createOutputSection(StringRef name, StringRef location);
316   OutputDesc *getOrCreateOutputSection(StringRef name);
317 
318   bool hasPhdrsCommands() { return !phdrsCommands.empty(); }
319   uint64_t getDot() { return dot; }
320   void discard(InputSectionBase &s);
321 
322   ExprValue getSymbolValue(StringRef name, const Twine &loc);
323 
324   void addOrphanSections();
325   void diagnoseOrphanHandling() const;
326   void diagnoseMissingSGSectionAddress() const;
327   void adjustOutputSections();
328   void adjustSectionsAfterSorting();
329 
330   SmallVector<PhdrEntry *, 0> createPhdrs();
331   bool needsInterpSection();
332 
333   bool shouldKeep(InputSectionBase *s);
334   const Defined *assignAddresses();
335   void allocateHeaders(SmallVector<PhdrEntry *, 0> &phdrs);
336   void processSectionCommands();
337   void processSymbolAssignments();
338   void declareSymbols();
339 
340   bool isDiscarded(const OutputSection *sec) const;
341 
342   // Used to handle INSERT AFTER statements.
343   void processInsertCommands();
344 
345   // Describe memory region usage.
346   void printMemoryUsage(raw_ostream &os);
347 
348   // Check backward location counter assignment and memory region/LMA overflows.
349   void checkFinalScriptConditions() const;
350 
351   // SECTIONS command list.
352   SmallVector<SectionCommand *, 0> sectionCommands;
353 
354   // PHDRS command list.
355   SmallVector<PhdrsCommand, 0> phdrsCommands;
356 
357   bool hasSectionsCommand = false;
358   bool seenDataAlign = false;
359   bool seenRelroEnd = false;
360   bool errorOnMissingSection = false;
361   std::string backwardDotErr;
362 
363   // List of section patterns specified with KEEP commands. They will
364   // be kept even if they are unused and --gc-sections is specified.
365   SmallVector<InputSectionDescription *, 0> keptSections;
366 
367   // A map from memory region name to a memory region descriptor.
368   llvm::MapVector<llvm::StringRef, MemoryRegion *> memoryRegions;
369 
370   // A list of symbols referenced by the script.
371   SmallVector<llvm::StringRef, 0> referencedSymbols;
372 
373   // Used to implement INSERT [AFTER|BEFORE]. Contains output sections that need
374   // to be reordered.
375   SmallVector<InsertCommand, 0> insertCommands;
376 
377   // OutputSections specified by OVERWRITE_SECTIONS.
378   SmallVector<OutputDesc *, 0> overwriteSections;
379 
380   // Sections that will be warned/errored by --orphan-handling.
381   SmallVector<const InputSectionBase *, 0> orphanSections;
382 };
383 
384 LLVM_LIBRARY_VISIBILITY extern std::unique_ptr<LinkerScript> script;
385 
386 } // end namespace lld::elf
387 
388 #endif // LLD_ELF_LINKER_SCRIPT_H
389