1 //===- LinkerScript.h -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_ELF_LINKER_SCRIPT_H
10 #define LLD_ELF_LINKER_SCRIPT_H
11 
12 #include "Config.h"
13 #include "Writer.h"
14 #include "lld/Common/LLVM.h"
15 #include "lld/Common/Strings.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/DenseSet.h"
19 #include "llvm/ADT/MapVector.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Support/MemoryBuffer.h"
22 #include <cstddef>
23 #include <cstdint>
24 #include <functional>
25 #include <memory>
26 #include <vector>
27 
28 namespace lld {
29 namespace elf {
30 
31 class Defined;
32 class InputFile;
33 class InputSection;
34 class InputSectionBase;
35 class OutputSection;
36 class SectionBase;
37 class Symbol;
38 class ThunkSection;
39 
40 // This represents an r-value in the linker script.
41 struct ExprValue {
ExprValueExprValue42   ExprValue(SectionBase *sec, bool forceAbsolute, uint64_t val,
43             const Twine &loc)
44       : sec(sec), forceAbsolute(forceAbsolute), val(val), loc(loc.str()) {}
45 
ExprValueExprValue46   ExprValue(uint64_t val) : ExprValue(nullptr, false, val, "") {}
47 
isAbsoluteExprValue48   bool isAbsolute() const { return forceAbsolute || sec == nullptr; }
49   uint64_t getValue() const;
50   uint64_t getSecAddr() const;
51   uint64_t getSectionOffset() const;
52 
53   // If a value is relative to a section, it has a non-null Sec.
54   SectionBase *sec;
55 
56   // True if this expression is enclosed in ABSOLUTE().
57   // This flag affects the return value of getValue().
58   bool forceAbsolute;
59 
60   uint64_t val;
61   uint64_t alignment = 1;
62 
63   // The original st_type if the expression represents a symbol. Any operation
64   // resets type to STT_NOTYPE.
65   uint8_t type = llvm::ELF::STT_NOTYPE;
66 
67   // Original source location. Used for error messages.
68   std::string loc;
69 };
70 
71 // This represents an expression in the linker script.
72 // ScriptParser::readExpr reads an expression and returns an Expr.
73 // Later, we evaluate the expression by calling the function.
74 using Expr = std::function<ExprValue()>;
75 
76 // This enum is used to implement linker script SECTIONS command.
77 // https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS
78 enum SectionsCommandKind {
79   AssignmentKind, // . = expr or <sym> = expr
80   OutputSectionKind,
81   InputSectionKind,
82   ByteKind    // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr)
83 };
84 
85 struct BaseCommand {
BaseCommandBaseCommand86   BaseCommand(int k) : kind(k) {}
87   int kind;
88 };
89 
90 // This represents ". = <expr>" or "<symbol> = <expr>".
91 struct SymbolAssignment : BaseCommand {
SymbolAssignmentSymbolAssignment92   SymbolAssignment(StringRef name, Expr e, std::string loc)
93       : BaseCommand(AssignmentKind), name(name), expression(e), location(loc) {}
94 
classofSymbolAssignment95   static bool classof(const BaseCommand *c) {
96     return c->kind == AssignmentKind;
97   }
98 
99   // The LHS of an expression. Name is either a symbol name or ".".
100   StringRef name;
101   Defined *sym = nullptr;
102 
103   // The RHS of an expression.
104   Expr expression;
105 
106   // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN.
107   bool provide = false;
108   bool hidden = false;
109 
110   // Holds file name and line number for error reporting.
111   std::string location;
112 
113   // A string representation of this command. We use this for -Map.
114   std::string commandString;
115 
116   // Address of this assignment command.
117   uint64_t addr;
118 
119   // Size of this assignment command. This is usually 0, but if
120   // you move '.' this may be greater than 0.
121   uint64_t size;
122 };
123 
124 // Linker scripts allow additional constraints to be put on output sections.
125 // If an output section is marked as ONLY_IF_RO, the section is created
126 // only if its input sections are read-only. Likewise, an output section
127 // with ONLY_IF_RW is created if all input sections are RW.
128 enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite };
129 
130 // This struct is used to represent the location and size of regions of
131 // target memory. Instances of the struct are created by parsing the
132 // MEMORY command.
133 struct MemoryRegion {
MemoryRegionMemoryRegion134   MemoryRegion(StringRef name, Expr origin, Expr length, uint32_t flags,
135                uint32_t negFlags)
136       : name(std::string(name)), origin(origin), length(length), flags(flags),
137         negFlags(negFlags) {}
138 
139   std::string name;
140   Expr origin;
141   Expr length;
142   uint32_t flags;
143   uint32_t negFlags;
144   uint64_t curPos = 0;
145 };
146 
147 // This struct represents one section match pattern in SECTIONS() command.
148 // It can optionally have negative match pattern for EXCLUDED_FILE command.
149 // Also it may be surrounded with SORT() command, so contains sorting rules.
150 class SectionPattern {
151   StringMatcher excludedFilePat;
152 
153   // Cache of the most recent input argument and result of excludesFile().
154   mutable llvm::Optional<std::pair<const InputFile *, bool>> excludesFileCache;
155 
156 public:
SectionPattern(StringMatcher && pat1,StringMatcher && pat2)157   SectionPattern(StringMatcher &&pat1, StringMatcher &&pat2)
158       : excludedFilePat(pat1), sectionPat(pat2),
159         sortOuter(SortSectionPolicy::Default),
160         sortInner(SortSectionPolicy::Default) {}
161 
162   bool excludesFile(const InputFile *file) const;
163 
164   StringMatcher sectionPat;
165   SortSectionPolicy sortOuter;
166   SortSectionPolicy sortInner;
167 };
168 
169 class InputSectionDescription : public BaseCommand {
170   SingleStringMatcher filePat;
171 
172   // Cache of the most recent input argument and result of matchesFile().
173   mutable llvm::Optional<std::pair<const InputFile *, bool>> matchesFileCache;
174 
175 public:
176   InputSectionDescription(StringRef filePattern, uint64_t withFlags = 0,
177                           uint64_t withoutFlags = 0)
BaseCommand(InputSectionKind)178       : BaseCommand(InputSectionKind), filePat(filePattern),
179         withFlags(withFlags), withoutFlags(withoutFlags) {}
180 
classof(const BaseCommand * c)181   static bool classof(const BaseCommand *c) {
182     return c->kind == InputSectionKind;
183   }
184 
185   bool matchesFile(const InputFile *file) const;
186 
187   // Input sections that matches at least one of SectionPatterns
188   // will be associated with this InputSectionDescription.
189   std::vector<SectionPattern> sectionPatterns;
190 
191   // Includes InputSections and MergeInputSections. Used temporarily during
192   // assignment of input sections to output sections.
193   std::vector<InputSectionBase *> sectionBases;
194 
195   // Used after the finalizeInputSections() pass. MergeInputSections have been
196   // merged into MergeSyntheticSections.
197   std::vector<InputSection *> sections;
198 
199   // Temporary record of synthetic ThunkSection instances and the pass that
200   // they were created in. This is used to insert newly created ThunkSections
201   // into Sections at the end of a createThunks() pass.
202   std::vector<std::pair<ThunkSection *, uint32_t>> thunkSections;
203 
204   // SectionPatterns can be filtered with the INPUT_SECTION_FLAGS command.
205   uint64_t withFlags;
206   uint64_t withoutFlags;
207 };
208 
209 // Represents BYTE(), SHORT(), LONG(), or QUAD().
210 struct ByteCommand : BaseCommand {
ByteCommandByteCommand211   ByteCommand(Expr e, unsigned size, std::string commandString)
212       : BaseCommand(ByteKind), commandString(commandString), expression(e),
213         size(size) {}
214 
classofByteCommand215   static bool classof(const BaseCommand *c) { return c->kind == ByteKind; }
216 
217   // Keeps string representing the command. Used for -Map" is perhaps better.
218   std::string commandString;
219 
220   Expr expression;
221 
222   // This is just an offset of this assignment command in the output section.
223   unsigned offset;
224 
225   // Size of this data command.
226   unsigned size;
227 };
228 
229 struct InsertCommand {
230   std::vector<StringRef> names;
231   bool isAfter;
232   StringRef where;
233 };
234 
235 struct PhdrsCommand {
236   StringRef name;
237   unsigned type = llvm::ELF::PT_NULL;
238   bool hasFilehdr = false;
239   bool hasPhdrs = false;
240   llvm::Optional<unsigned> flags;
241   Expr lmaExpr = nullptr;
242 };
243 
244 class LinkerScript final {
245   // Temporary state used in processSectionCommands() and assignAddresses()
246   // that must be reinitialized for each call to the above functions, and must
247   // not be used outside of the scope of a call to the above functions.
248   struct AddressState {
249     AddressState();
250     OutputSection *outSec = nullptr;
251     MemoryRegion *memRegion = nullptr;
252     MemoryRegion *lmaRegion = nullptr;
253     uint64_t lmaOffset = 0;
254     uint64_t tbssAddr = 0;
255   };
256 
257   llvm::DenseMap<StringRef, OutputSection *> nameToOutputSection;
258 
259   void addSymbol(SymbolAssignment *cmd);
260   void assignSymbol(SymbolAssignment *cmd, bool inSec);
261   void setDot(Expr e, const Twine &loc, bool inSec);
262   void expandOutputSection(uint64_t size);
263   void expandMemoryRegions(uint64_t size);
264 
265   std::vector<InputSectionBase *>
266   computeInputSections(const InputSectionDescription *,
267                        ArrayRef<InputSectionBase *>);
268 
269   std::vector<InputSectionBase *> createInputSectionList(OutputSection &cmd);
270 
271   void discardSynthetic(OutputSection &);
272 
273   std::vector<size_t> getPhdrIndices(OutputSection *sec);
274 
275   MemoryRegion *findMemoryRegion(OutputSection *sec);
276 
277   void switchTo(OutputSection *sec);
278   uint64_t advance(uint64_t size, unsigned align);
279   void output(InputSection *sec);
280 
281   void assignOffsets(OutputSection *sec);
282 
283   // Ctx captures the local AddressState and makes it accessible
284   // deliberately. This is needed as there are some cases where we cannot just
285   // thread the current state through to a lambda function created by the
286   // script parser.
287   // This should remain a plain pointer as its lifetime is smaller than
288   // LinkerScript.
289   AddressState *ctx = nullptr;
290 
291   OutputSection *aether;
292 
293   uint64_t dot;
294 
295 public:
296   OutputSection *createOutputSection(StringRef name, StringRef location);
297   OutputSection *getOrCreateOutputSection(StringRef name);
298 
hasPhdrsCommands()299   bool hasPhdrsCommands() { return !phdrsCommands.empty(); }
getDot()300   uint64_t getDot() { return dot; }
301   void discard(InputSectionBase *s);
302 
303   ExprValue getSymbolValue(StringRef name, const Twine &loc);
304 
305   void addOrphanSections();
306   void diagnoseOrphanHandling() const;
307   void adjustSectionsBeforeSorting();
308   void adjustSectionsAfterSorting();
309 
310   std::vector<PhdrEntry *> createPhdrs();
311   bool needsInterpSection();
312 
313   bool shouldKeep(InputSectionBase *s);
314   const Defined *assignAddresses();
315   void allocateHeaders(std::vector<PhdrEntry *> &phdrs);
316   void processSectionCommands();
317   void processSymbolAssignments();
318   void declareSymbols();
319 
320   // Used to handle INSERT AFTER statements.
321   void processInsertCommands();
322 
323   // SECTIONS command list.
324   std::vector<BaseCommand *> sectionCommands;
325 
326   // PHDRS command list.
327   std::vector<PhdrsCommand> phdrsCommands;
328 
329   bool hasSectionsCommand = false;
330   bool errorOnMissingSection = false;
331 
332   // List of section patterns specified with KEEP commands. They will
333   // be kept even if they are unused and --gc-sections is specified.
334   std::vector<InputSectionDescription *> keptSections;
335 
336   // A map from memory region name to a memory region descriptor.
337   llvm::MapVector<llvm::StringRef, MemoryRegion *> memoryRegions;
338 
339   // A list of symbols referenced by the script.
340   std::vector<llvm::StringRef> referencedSymbols;
341 
342   // Used to implement INSERT [AFTER|BEFORE]. Contains output sections that need
343   // to be reordered.
344   std::vector<InsertCommand> insertCommands;
345 
346   // OutputSections specified by OVERWRITE_SECTIONS.
347   std::vector<OutputSection *> overwriteSections;
348 
349   // Sections that will be warned/errored by --orphan-handling.
350   std::vector<const InputSectionBase *> orphanSections;
351 };
352 
353 extern LinkerScript *script;
354 
355 } // end namespace elf
356 } // end namespace lld
357 
358 #endif // LLD_ELF_LINKER_SCRIPT_H
359