1 //===- LinkerScript.h -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_ELF_LINKER_SCRIPT_H
10 #define LLD_ELF_LINKER_SCRIPT_H
11 
12 #include "Config.h"
13 #include "Writer.h"
14 #include "lld/Common/LLVM.h"
15 #include "lld/Common/Strings.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/DenseSet.h"
19 #include "llvm/ADT/MapVector.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Support/MemoryBuffer.h"
22 #include <cstddef>
23 #include <cstdint>
24 #include <functional>
25 #include <memory>
26 #include <vector>
27 
28 namespace lld {
29 namespace elf {
30 
31 class Defined;
32 class InputFile;
33 class InputSection;
34 class InputSectionBase;
35 class OutputSection;
36 class SectionBase;
37 class Symbol;
38 class ThunkSection;
39 
40 // This represents an r-value in the linker script.
41 struct ExprValue {
42   ExprValue(SectionBase *sec, bool forceAbsolute, uint64_t val,
43             const Twine &loc)
44       : sec(sec), val(val), forceAbsolute(forceAbsolute), loc(loc.str()) {}
45 
46   ExprValue(uint64_t val) : ExprValue(nullptr, false, val, "") {}
47 
48   bool isAbsolute() const { return forceAbsolute || sec == nullptr; }
49   uint64_t getValue() const;
50   uint64_t getSecAddr() const;
51   uint64_t getSectionOffset() const;
52 
53   // If a value is relative to a section, it has a non-null Sec.
54   SectionBase *sec;
55 
56   uint64_t val;
57   uint64_t alignment = 1;
58 
59   // The original st_type if the expression represents a symbol. Any operation
60   // resets type to STT_NOTYPE.
61   uint8_t type = llvm::ELF::STT_NOTYPE;
62 
63   // True if this expression is enclosed in ABSOLUTE().
64   // This flag affects the return value of getValue().
65   bool forceAbsolute;
66 
67   // Original source location. Used for error messages.
68   std::string loc;
69 };
70 
71 // This represents an expression in the linker script.
72 // ScriptParser::readExpr reads an expression and returns an Expr.
73 // Later, we evaluate the expression by calling the function.
74 using Expr = std::function<ExprValue()>;
75 
76 // This enum is used to implement linker script SECTIONS command.
77 // https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS
78 enum SectionsCommandKind {
79   AssignmentKind, // . = expr or <sym> = expr
80   OutputSectionKind,
81   InputSectionKind,
82   ByteKind    // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr)
83 };
84 
85 struct SectionCommand {
86   SectionCommand(int k) : kind(k) {}
87   int kind;
88 };
89 
90 // This represents ". = <expr>" or "<symbol> = <expr>".
91 struct SymbolAssignment : SectionCommand {
92   SymbolAssignment(StringRef name, Expr e, std::string loc)
93       : SectionCommand(AssignmentKind), name(name), expression(e),
94         location(loc) {}
95 
96   static bool classof(const SectionCommand *c) {
97     return c->kind == AssignmentKind;
98   }
99 
100   // The LHS of an expression. Name is either a symbol name or ".".
101   StringRef name;
102   Defined *sym = nullptr;
103 
104   // The RHS of an expression.
105   Expr expression;
106 
107   // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN.
108   bool provide = false;
109   bool hidden = false;
110 
111   // Holds file name and line number for error reporting.
112   std::string location;
113 
114   // A string representation of this command. We use this for -Map.
115   std::string commandString;
116 
117   // Address of this assignment command.
118   uint64_t addr;
119 
120   // Size of this assignment command. This is usually 0, but if
121   // you move '.' this may be greater than 0.
122   uint64_t size;
123 };
124 
125 // Linker scripts allow additional constraints to be put on output sections.
126 // If an output section is marked as ONLY_IF_RO, the section is created
127 // only if its input sections are read-only. Likewise, an output section
128 // with ONLY_IF_RW is created if all input sections are RW.
129 enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite };
130 
131 // This struct is used to represent the location and size of regions of
132 // target memory. Instances of the struct are created by parsing the
133 // MEMORY command.
134 struct MemoryRegion {
135   MemoryRegion(StringRef name, Expr origin, Expr length, uint32_t flags,
136                uint32_t invFlags, uint32_t negFlags, uint32_t negInvFlags)
137       : name(std::string(name)), origin(origin), length(length), flags(flags),
138         invFlags(invFlags), negFlags(negFlags), negInvFlags(negInvFlags) {}
139 
140   std::string name;
141   Expr origin;
142   Expr length;
143   // A section can be assigned to the region if any of these ELF section flags
144   // are set...
145   uint32_t flags;
146   // ... or any of these flags are not set.
147   // For example, the memory region attribute "r" maps to SHF_WRITE.
148   uint32_t invFlags;
149   // A section cannot be assigned to the region if any of these ELF section
150   // flags are set...
151   uint32_t negFlags;
152   // ... or any of these flags are not set.
153   // For example, the memory region attribute "!r" maps to SHF_WRITE.
154   uint32_t negInvFlags;
155   uint64_t curPos = 0;
156 
157   bool compatibleWith(uint32_t secFlags) const {
158     if ((secFlags & negFlags) || (~secFlags & negInvFlags))
159       return false;
160     return (secFlags & flags) || (~secFlags & invFlags);
161   }
162 };
163 
164 // This struct represents one section match pattern in SECTIONS() command.
165 // It can optionally have negative match pattern for EXCLUDED_FILE command.
166 // Also it may be surrounded with SORT() command, so contains sorting rules.
167 class SectionPattern {
168   StringMatcher excludedFilePat;
169 
170   // Cache of the most recent input argument and result of excludesFile().
171   mutable llvm::Optional<std::pair<const InputFile *, bool>> excludesFileCache;
172 
173 public:
174   SectionPattern(StringMatcher &&pat1, StringMatcher &&pat2)
175       : excludedFilePat(pat1), sectionPat(pat2),
176         sortOuter(SortSectionPolicy::Default),
177         sortInner(SortSectionPolicy::Default) {}
178 
179   bool excludesFile(const InputFile *file) const;
180 
181   StringMatcher sectionPat;
182   SortSectionPolicy sortOuter;
183   SortSectionPolicy sortInner;
184 };
185 
186 class InputSectionDescription : public SectionCommand {
187   SingleStringMatcher filePat;
188 
189   // Cache of the most recent input argument and result of matchesFile().
190   mutable llvm::Optional<std::pair<const InputFile *, bool>> matchesFileCache;
191 
192 public:
193   InputSectionDescription(StringRef filePattern, uint64_t withFlags = 0,
194                           uint64_t withoutFlags = 0)
195       : SectionCommand(InputSectionKind), filePat(filePattern),
196         withFlags(withFlags), withoutFlags(withoutFlags) {}
197 
198   static bool classof(const SectionCommand *c) {
199     return c->kind == InputSectionKind;
200   }
201 
202   bool matchesFile(const InputFile *file) const;
203 
204   // Input sections that matches at least one of SectionPatterns
205   // will be associated with this InputSectionDescription.
206   SmallVector<SectionPattern, 0> sectionPatterns;
207 
208   // Includes InputSections and MergeInputSections. Used temporarily during
209   // assignment of input sections to output sections.
210   SmallVector<InputSectionBase *, 0> sectionBases;
211 
212   // Used after the finalizeInputSections() pass. MergeInputSections have been
213   // merged into MergeSyntheticSections.
214   SmallVector<InputSection *, 0> sections;
215 
216   // Temporary record of synthetic ThunkSection instances and the pass that
217   // they were created in. This is used to insert newly created ThunkSections
218   // into Sections at the end of a createThunks() pass.
219   SmallVector<std::pair<ThunkSection *, uint32_t>, 0> thunkSections;
220 
221   // SectionPatterns can be filtered with the INPUT_SECTION_FLAGS command.
222   uint64_t withFlags;
223   uint64_t withoutFlags;
224 };
225 
226 // Represents BYTE(), SHORT(), LONG(), or QUAD().
227 struct ByteCommand : SectionCommand {
228   ByteCommand(Expr e, unsigned size, std::string commandString)
229       : SectionCommand(ByteKind), commandString(commandString), expression(e),
230         size(size) {}
231 
232   static bool classof(const SectionCommand *c) { return c->kind == ByteKind; }
233 
234   // Keeps string representing the command. Used for -Map" is perhaps better.
235   std::string commandString;
236 
237   Expr expression;
238 
239   // This is just an offset of this assignment command in the output section.
240   unsigned offset;
241 
242   // Size of this data command.
243   unsigned size;
244 };
245 
246 struct InsertCommand {
247   SmallVector<StringRef, 0> names;
248   bool isAfter;
249   StringRef where;
250 };
251 
252 struct PhdrsCommand {
253   StringRef name;
254   unsigned type = llvm::ELF::PT_NULL;
255   bool hasFilehdr = false;
256   bool hasPhdrs = false;
257   llvm::Optional<unsigned> flags;
258   Expr lmaExpr = nullptr;
259 };
260 
261 class LinkerScript final {
262   // Temporary state used in processSectionCommands() and assignAddresses()
263   // that must be reinitialized for each call to the above functions, and must
264   // not be used outside of the scope of a call to the above functions.
265   struct AddressState {
266     AddressState();
267     OutputSection *outSec = nullptr;
268     MemoryRegion *memRegion = nullptr;
269     MemoryRegion *lmaRegion = nullptr;
270     uint64_t lmaOffset = 0;
271     uint64_t tbssAddr = 0;
272   };
273 
274   llvm::DenseMap<llvm::CachedHashStringRef, OutputSection *>
275       nameToOutputSection;
276 
277   void addSymbol(SymbolAssignment *cmd);
278   void assignSymbol(SymbolAssignment *cmd, bool inSec);
279   void setDot(Expr e, const Twine &loc, bool inSec);
280   void expandOutputSection(uint64_t size);
281   void expandMemoryRegions(uint64_t size);
282 
283   SmallVector<InputSectionBase *, 0>
284   computeInputSections(const InputSectionDescription *,
285                        ArrayRef<InputSectionBase *>);
286 
287   SmallVector<InputSectionBase *, 0> createInputSectionList(OutputSection &cmd);
288 
289   void discardSynthetic(OutputSection &);
290 
291   SmallVector<size_t, 0> getPhdrIndices(OutputSection *sec);
292 
293   std::pair<MemoryRegion *, MemoryRegion *>
294   findMemoryRegion(OutputSection *sec, MemoryRegion *hint);
295 
296   void assignOffsets(OutputSection *sec);
297 
298   // Ctx captures the local AddressState and makes it accessible
299   // deliberately. This is needed as there are some cases where we cannot just
300   // thread the current state through to a lambda function created by the
301   // script parser.
302   // This should remain a plain pointer as its lifetime is smaller than
303   // LinkerScript.
304   AddressState *ctx = nullptr;
305 
306   OutputSection *aether;
307 
308   uint64_t dot;
309 
310 public:
311   OutputSection *createOutputSection(StringRef name, StringRef location);
312   OutputSection *getOrCreateOutputSection(StringRef name);
313 
314   bool hasPhdrsCommands() { return !phdrsCommands.empty(); }
315   uint64_t getDot() { return dot; }
316   void discard(InputSectionBase &s);
317 
318   ExprValue getSymbolValue(StringRef name, const Twine &loc);
319 
320   void addOrphanSections();
321   void diagnoseOrphanHandling() const;
322   void adjustOutputSections();
323   void adjustSectionsAfterSorting();
324 
325   SmallVector<PhdrEntry *, 0> createPhdrs();
326   bool needsInterpSection();
327 
328   bool shouldKeep(InputSectionBase *s);
329   const Defined *assignAddresses();
330   void allocateHeaders(SmallVector<PhdrEntry *, 0> &phdrs);
331   void processSectionCommands();
332   void processSymbolAssignments();
333   void declareSymbols();
334 
335   bool isDiscarded(const OutputSection *sec) const;
336 
337   // Used to handle INSERT AFTER statements.
338   void processInsertCommands();
339 
340   // SECTIONS command list.
341   SmallVector<SectionCommand *, 0> sectionCommands;
342 
343   // PHDRS command list.
344   SmallVector<PhdrsCommand, 0> phdrsCommands;
345 
346   bool hasSectionsCommand = false;
347   bool errorOnMissingSection = false;
348 
349   // List of section patterns specified with KEEP commands. They will
350   // be kept even if they are unused and --gc-sections is specified.
351   SmallVector<InputSectionDescription *, 0> keptSections;
352 
353   // A map from memory region name to a memory region descriptor.
354   llvm::MapVector<llvm::StringRef, MemoryRegion *> memoryRegions;
355 
356   // A list of symbols referenced by the script.
357   SmallVector<llvm::StringRef, 0> referencedSymbols;
358 
359   // Used to implement INSERT [AFTER|BEFORE]. Contains output sections that need
360   // to be reordered.
361   SmallVector<InsertCommand, 0> insertCommands;
362 
363   // OutputSections specified by OVERWRITE_SECTIONS.
364   SmallVector<OutputSection *, 0> overwriteSections;
365 
366   // Sections that will be warned/errored by --orphan-handling.
367   SmallVector<const InputSectionBase *, 0> orphanSections;
368 };
369 
370 extern std::unique_ptr<LinkerScript> script;
371 
372 } // end namespace elf
373 } // end namespace lld
374 
375 #endif // LLD_ELF_LINKER_SCRIPT_H
376