1 // script.h -- handle linker scripts for gold   -*- C++ -*-
2 
3 // Copyright (C) 2006-2020 Free Software Foundation, Inc.
4 // Written by Ian Lance Taylor <iant@google.com>.
5 
6 // This file is part of gold.
7 
8 // This program is free software; you can redistribute it and/or modify
9 // it under the terms of the GNU General Public License as published by
10 // the Free Software Foundation; either version 3 of the License, or
11 // (at your option) any later version.
12 
13 // This program is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 // GNU General Public License for more details.
17 
18 // You should have received a copy of the GNU General Public License
19 // along with this program; if not, write to the Free Software
20 // Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
21 // MA 02110-1301, USA.
22 
23 // We implement a subset of the original GNU ld linker script language
24 // for compatibility.  The goal is not to implement the entire
25 // language.  It is merely to implement enough to handle common uses.
26 // In particular we need to handle /usr/lib/libc.so on a typical
27 // GNU/Linux system, and we want to handle linker scripts used by the
28 // Linux kernel build.
29 
30 #ifndef GOLD_SCRIPT_H
31 #define GOLD_SCRIPT_H
32 
33 #include <cstdio>
34 #include <string>
35 #include <vector>
36 
37 #include "elfcpp.h"
38 #include "script-sections.h"
39 
40 namespace gold
41 {
42 
43 class General_options;
44 class Command_line;
45 class Symbol_table;
46 class Layout;
47 class Mapfile;
48 class Input_argument;
49 class Input_arguments;
50 class Input_objects;
51 class Input_group;
52 class Input_file;
53 class Output_segment;
54 class Task_token;
55 class Workqueue;
56 struct Version_dependency_list;
57 struct Version_expression_list;
58 struct Version_tree;
59 struct Version_expression;
60 class Lazy_demangler;
61 class Incremental_script_entry;
62 
63 // This class represents an expression in a linker script.
64 
65 class Expression
66 {
67  protected:
68   // These should only be created by child classes.
69   Expression()
70   { }
71 
72  public:
73   virtual ~Expression()
74   { }
75 
76   // Return the value of the expression which is not permitted to
77   // refer to the dot symbol.  CHECK_ASSERTIONS is true if we should
78   // check whether assertions are true.
79   uint64_t
80   eval(const Symbol_table*, const Layout*, bool check_assertions);
81 
82   // Return the value of an expression which is permitted to refer to
83   // the dot symbol.  DOT_VALUE is the absolute value of the dot
84   // symbol.  DOT_SECTION is the section in which dot is defined; it
85   // should be NULL if the dot symbol has an absolute value (e.g., is
86   // defined in a SECTIONS clause outside of any output section
87   // definition).  This sets *RESULT_SECTION to indicate where the
88   // value is defined.  If the value is absolute *RESULT_SECTION will
89   // be NULL.  Note that the returned value is still an absolute
90   // value; to get a section relative value the caller must subtract
91   // the section address.  If RESULT_ALIGNMENT is not NULL, this sets
92   // *RESULT_ALIGNMENT to the alignment of the value of that alignment
93   // is larger than *RESULT_ALIGNMENT; this will only be non-zero if
94   // this is an ALIGN expression.  If IS_SECTION_DOT_ASSIGMENT is true,
95   // we are evaluating an assignment to dot within an output section,
96   // and an absolute value should be interpreted as an offset within
97   // the section.
98   uint64_t
99   eval_with_dot(const Symbol_table*, const Layout*, bool check_assertions,
100 		uint64_t dot_value, Output_section* dot_section,
101 		Output_section** result_section, uint64_t* result_alignment,
102 		bool is_section_dot_assignment);
103 
104   // Return the value of an expression which may or may not be
105   // permitted to refer to the dot symbol, depending on
106   // is_dot_available.  If IS_SECTION_DOT_ASSIGMENT is true,
107   // we are evaluating an assignment to dot within an output section,
108   // and an absolute value should be interpreted as an offset within
109   // the section.
110   uint64_t
111   eval_maybe_dot(const Symbol_table*, const Layout*, bool check_assertions,
112 		 bool is_dot_available, uint64_t dot_value,
113 		 Output_section* dot_section,
114 		 Output_section** result_section, uint64_t* result_alignment,
115 		 elfcpp::STT* type, elfcpp::STV* vis, unsigned char* nonvis,
116 		 bool is_section_dot_assignment, bool* is_valid_pointer);
117 
118   // Print the expression to the FILE.  This is for debugging.
119   virtual void
120   print(FILE*) const = 0;
121 
122  protected:
123   struct Expression_eval_info;
124 
125  public:
126   // Compute the value of the expression (implemented by child class).
127   // This is public rather than protected because it is called
128   // directly by children of Expression on other Expression objects.
129   virtual uint64_t
130   value(const Expression_eval_info*) = 0;
131 
132   // Sets all symbols used in expressions as seen in a real ELF object.
133   virtual void
134   set_expr_sym_in_real_elf(Symbol_table*) const
135   { return; }
136 
137  private:
138   // May not be copied.
139   Expression(const Expression&);
140   Expression& operator=(const Expression&);
141 };
142 
143 // Version_script_info stores information parsed from the version
144 // script, either provided by --version-script or as part of a linker
145 // script.  A single Version_script_info object per target is owned by
146 // Script_options.
147 
148 class Version_script_info
149 {
150  public:
151   // The languages which can be specified in a versionn script.
152   enum Language
153   {
154     LANGUAGE_C,		// No demangling.
155     LANGUAGE_CXX,	// C++ demangling.
156     LANGUAGE_JAVA,	// Java demangling.
157     LANGUAGE_COUNT
158   };
159 
160   Version_script_info();
161 
162   ~Version_script_info();
163 
164   // Clear everything.
165   void
166   clear();
167 
168   // Finalize the version control information.
169   void
170   finalize();
171 
172   // Return whether the information is finalized.
173   bool
174   is_finalized() const
175   { return this->is_finalized_; }
176 
177   // Return whether any version were defined in the version script.
178   bool
179   empty() const
180   { return this->version_trees_.empty(); }
181 
182   // If there is a version associated with SYMBOL, return true, and
183   // set *VERSION to the version, and *IS_GLOBAL to whether the symbol
184   // should be global.  Otherwise, return false.
185   bool
186   get_symbol_version(const char* symbol, std::string* version,
187 		     bool* is_global) const;
188 
189   // Return whether this symbol matches the local: section of some
190   // version.
191   bool
192   symbol_is_local(const char* symbol) const
193   {
194     bool is_global;
195     return (this->get_symbol_version(symbol, NULL, &is_global)
196 	    && !is_global);
197   }
198 
199   // Return the names of versions defined in the version script.
200   std::vector<std::string>
201   get_versions() const;
202 
203   // Return the list of dependencies for this version.
204   std::vector<std::string>
205   get_dependencies(const char* version) const;
206 
207   // The following functions should only be used by the bison helper
208   // functions.  They allocate new structs whose memory belongs to
209   // Version_script_info.  The bison functions copy the information
210   // from the version script into these structs.
211   struct Version_dependency_list*
212   allocate_dependency_list();
213 
214   struct Version_expression_list*
215   allocate_expression_list();
216 
217   struct Version_tree*
218   allocate_version_tree();
219 
220   // Build the lookup tables after all data have been read.
221   void
222   build_lookup_tables();
223 
224   // Give an error if there are any unmatched names in the version
225   // script.
226   void
227   check_unmatched_names(const Symbol_table*) const;
228 
229   // Print contents to the FILE.  This is for debugging.
230   void
231   print(FILE*) const;
232 
233  private:
234   void
235   print_expression_list(FILE* f, const Version_expression_list*) const;
236 
237   bool
238   get_symbol_version_helper(const char* symbol,
239 			    bool check_global,
240 			    std::string* pversion) const;
241 
242   // Fast lookup information for a given language.
243 
244   // We map from exact match strings to Version_tree's.  Historically
245   // version scripts sometimes have the same symbol multiple times,
246   // which is ambiguous.  We warn about that case by storing the
247   // second Version_tree we see.
248   struct Version_tree_match
249   {
250     Version_tree_match(const Version_tree* r, bool ig,
251 		       const Version_expression* e)
252       : real(r), is_global(ig), expression(e), ambiguous(NULL)
253     { }
254 
255     // The Version_tree that we return.
256     const Version_tree* real;
257     // True if this is a global match for the REAL member, false if it
258     // is a local match.
259     bool is_global;
260     // Point back to the Version_expression for which we created this
261     // match.
262     const Version_expression* expression;
263     // If not NULL, another Version_tree that defines the symbol.
264     const Version_tree* ambiguous;
265   };
266 
267   // Map from an exact match string to a Version_tree.
268 
269   typedef Unordered_map<std::string, Version_tree_match> Exact;
270 
271   // Fast lookup information for a glob pattern.
272   struct Glob
273   {
274     Glob()
275       : expression(NULL), version(NULL), is_global(false)
276     { }
277 
278     Glob(const Version_expression* e, const Version_tree* v, bool ig)
279       : expression(e), version(v), is_global(ig)
280     { }
281 
282     // A pointer to the version expression holding the pattern to
283     // match and the language to use for demangling the symbol before
284     // doing the match.
285     const Version_expression* expression;
286     // The Version_tree we use if this pattern matches.
287     const Version_tree* version;
288     // True if this is a global symbol.
289     bool is_global;
290   };
291 
292   typedef std::vector<Glob> Globs;
293 
294   bool
295   unquote(std::string*) const;
296 
297   void
298   add_exact_match(const std::string&, const Version_tree*, bool is_global,
299 		  const Version_expression*, Exact*);
300 
301   void
302   build_expression_list_lookup(const Version_expression_list*,
303 			       const Version_tree*, bool);
304 
305   const char*
306   get_name_to_match(const char*, int,
307 		    Lazy_demangler*, Lazy_demangler*) const;
308 
309   // All the version dependencies we allocate.
310   std::vector<Version_dependency_list*> dependency_lists_;
311   // All the version expressions we allocate.
312   std::vector<Version_expression_list*> expression_lists_;
313   // The list of versions.
314   std::vector<Version_tree*> version_trees_;
315   // Exact matches for global symbols, by language.
316   Exact* exact_[LANGUAGE_COUNT];
317   // A vector of glob patterns mapping to Version_trees.
318   Globs globs_;
319   // The default version to use, if there is one.  This is from a
320   // pattern of "*".
321   const Version_tree* default_version_;
322   // True if the default version is global.
323   bool default_is_global_;
324   // Whether this has been finalized.
325   bool is_finalized_;
326 };
327 
328 // This class manages assignments to symbols.  These can appear in
329 // three different locations in scripts: outside of a SECTIONS clause,
330 // within a SECTIONS clause, and within an output section definition
331 // within a SECTIONS clause.  This can also appear on the command line
332 // via the --defsym command line option.
333 
334 class Symbol_assignment
335 {
336  public:
337   Symbol_assignment(const char* name, size_t namelen, bool is_defsym,
338 		    Expression* val, bool provide, bool hidden)
339     : name_(name, namelen), val_(val), is_defsym_(is_defsym),
340       provide_(provide), hidden_(hidden), sym_(NULL)
341   { }
342 
343   // Add the symbol to the symbol table.
344   void
345   add_to_table(Symbol_table*);
346 
347   // Finalize the symbol value.
348   void
349   finalize(Symbol_table*, const Layout*);
350 
351   bool
352   is_defsym() const
353   { return is_defsym_; }
354 
355   Expression *
356   value() const
357   { return val_; }
358 
359   // Finalize the symbol value when it can refer to the dot symbol.
360   void
361   finalize_with_dot(Symbol_table*, const Layout*, uint64_t dot_value,
362 		    Output_section* dot_section);
363 
364   // Set the symbol value, but only if the value is absolute or relative to
365   // DOT_SECTION.  This is used while processing a SECTIONS clause.
366   // We assume that dot is an absolute value here.  We do not check assertions.
367   void
368   set_if_absolute(Symbol_table*, const Layout*, bool is_dot_available,
369 		  uint64_t dot_value, Output_section* dot_section);
370 
371   const std::string&
372   name() const
373   { return this->name_; }
374 
375   // Print the assignment to the FILE.  This is for debugging.
376   void
377   print(FILE*) const;
378 
379  private:
380   // Shared by finalize and finalize_with_dot.
381   void
382   finalize_maybe_dot(Symbol_table*, const Layout*, bool is_dot_available,
383 		     uint64_t dot_value, Output_section* dot_section);
384 
385   // Sized version of finalize.
386   template<int size>
387   void
388   sized_finalize(Symbol_table*, const Layout*, bool is_dot_available,
389 		 uint64_t dot_value, Output_section*);
390 
391   // Symbol name.
392   std::string name_;
393   // Expression to assign to symbol.
394   Expression* val_;
395   // True if this symbol is defined by a --defsym, false if it is
396   // defined in a linker script.
397   bool is_defsym_;
398   // Whether the assignment should be provided (only set if there is
399   // an undefined reference to the symbol.
400   bool provide_;
401   // Whether the assignment should be hidden.
402   bool hidden_;
403   // The entry in the symbol table.
404   Symbol* sym_;
405 };
406 
407 // This class manages assertions in linker scripts.  These can appear
408 // in all the places where a Symbol_assignment can appear.
409 
410 class Script_assertion
411 {
412  public:
413   Script_assertion(Expression* check, const char* message,
414 		   size_t messagelen)
415     : check_(check), message_(message, messagelen)
416   { }
417 
418   // Check the assertion.
419   void
420   check(const Symbol_table*, const Layout*);
421 
422   // Print the assertion to the FILE.  This is for debugging.
423   void
424   print(FILE*) const;
425 
426  private:
427   // The expression to check.
428   Expression* check_;
429   // The message to issue if the expression fails.
430   std::string message_;
431 };
432 
433 // We can read a linker script in two different contexts: when
434 // initially parsing the command line, and when we find an input file
435 // which is actually a linker script.  Also some of the data which can
436 // be set by a linker script can also be set via command line options
437 // like -e and --defsym.  This means that we have a type of data which
438 // can be set both during command line option parsing and while
439 // reading input files.  We store that data in an instance of this
440 // object.  We will keep pointers to that instance in both the
441 // Command_line and Layout objects.
442 
443 class Script_options
444 {
445  public:
446   Script_options();
447 
448   // Add a symbol to be defined.
449   void
450   add_symbol_assignment(const char* name, size_t length, bool is_defsym,
451 			Expression* value, bool provide, bool hidden);
452 
453   // Look for an assigned symbol.
454   bool
455   is_pending_assignment(const char* name);
456 
457   // Add a reference to a symbol.
458   void
459   add_symbol_reference(const char* name, size_t length);
460 
461   // Add an assertion.
462   void
463   add_assertion(Expression* check, const char* message, size_t messagelen);
464 
465   // Define a symbol from the command line.
466   bool
467   define_symbol(const char* definition);
468 
469   // Populates the set with symbol names used in LHS of defsym.
470   void
471   find_defsym_defs(Unordered_set<std::string>&);
472 
473   // Set symbols used in defsym expressions as seen in a real ELF object.
474   void set_defsym_uses_in_real_elf(Symbol_table*) const;
475 
476   // Create sections required by any linker scripts.
477   void
478   create_script_sections(Layout*);
479 
480   // Add all symbol definitions to the symbol table.
481   void
482   add_symbols_to_table(Symbol_table*);
483 
484   // Used to iterate over symbols which are referenced in expressions
485   // but not defined.
486   typedef Unordered_set<std::string>::const_iterator referenced_const_iterator;
487 
488   referenced_const_iterator
489   referenced_begin() const
490   { return this->symbol_references_.begin(); }
491 
492   referenced_const_iterator
493   referenced_end() const
494   { return this->symbol_references_.end(); }
495 
496   // Return whether a symbol is referenced but not defined.
497   bool
498   is_referenced(const std::string& name) const
499   {
500     return (this->symbol_references_.find(name)
501 	    != this->symbol_references_.end());
502   }
503 
504   // Return whether there are any symbols which were referenced but
505   // not defined.
506   bool
507   any_unreferenced() const
508   { return !this->symbol_references_.empty(); }
509 
510   // Finalize the symbol values.  Also check assertions.
511   void
512   finalize_symbols(Symbol_table*, const Layout*);
513 
514   // Version information parsed from a version script.  Everything
515   // else has a pointer to this object.
516   Version_script_info*
517   version_script_info()
518   { return &this->version_script_info_; }
519 
520   const Version_script_info*
521   version_script_info() const
522   { return &this->version_script_info_; }
523 
524   // A SECTIONS clause parsed from a linker script.  Everything else
525   // has a pointer to this object.
526   Script_sections*
527   script_sections()
528   { return &this->script_sections_; }
529 
530   const Script_sections*
531   script_sections() const
532   { return &this->script_sections_; }
533 
534   // Whether we saw a SECTIONS clause.
535   bool
536   saw_sections_clause() const
537   { return this->script_sections_.saw_sections_clause(); }
538 
539   // Whether we saw a PHDRS clause.
540   bool
541   saw_phdrs_clause() const
542   { return this->script_sections_.saw_phdrs_clause(); }
543 
544   // Set section addresses using a SECTIONS clause.  Return the
545   // segment which should hold the file header and segment headers;
546   // this may return NULL, in which case the headers are not in a
547   // loadable segment.
548   Output_segment*
549   set_section_addresses(Symbol_table*, Layout*);
550 
551   // Print the script to the FILE.  This is for debugging.
552   void
553   print(FILE*) const;
554 
555  private:
556   // We keep a list of symbol assignments which occur outside of a
557   // SECTIONS clause.
558   typedef std::vector<Symbol_assignment*> Symbol_assignments;
559 
560   // We keep a list of all assertions which occur outside of a
561   // SECTIONS clause.
562   typedef std::vector<Script_assertion*> Assertions;
563 
564   // The entry address.  This will be empty if not set.
565   std::string entry_;
566   // Symbols to set.
567   Symbol_assignments symbol_assignments_;
568   // Symbols defined in an expression, for faster lookup.
569   Unordered_set<std::string> symbol_definitions_;
570   // Symbols referenced in an expression.
571   Unordered_set<std::string> symbol_references_;
572   // Assertions to check.
573   Assertions assertions_;
574   // Version information parsed from a version script.
575   Version_script_info version_script_info_;
576   // Information from any SECTIONS clauses.
577   Script_sections script_sections_;
578 };
579 
580 // FILE was found as an argument on the command line, but was not
581 // recognized as an ELF file.  Try to read it as a script.  Return
582 // true if the file was handled.  This has to handle /usr/lib/libc.so
583 // on a GNU/Linux system.  *USED_NEXT_BLOCKER is set to indicate
584 // whether the function took over NEXT_BLOCKER.
585 
586 bool
587 read_input_script(Workqueue*, Symbol_table*, Layout*, Dirsearch*, int,
588 		  Input_objects*, Mapfile*, Input_group*,
589 		  const Input_argument*, Input_file*,
590 		  Task_token* next_blocker, bool* used_next_blocker);
591 
592 // FILE was found as an argument to --script (-T).
593 // Read it as a script, and execute its contents immediately.
594 
595 bool
596 read_commandline_script(const char* filename, Command_line* cmdline);
597 
598 // FILE was found as an argument to --version-script.  Read it as a
599 // version script, and store its contents in
600 // cmdline->script_options()->version_script_info().
601 
602 bool
603 read_version_script(const char* filename, Command_line* cmdline);
604 
605 // FILENAME was found as an argument to --dynamic-list.  Read it as a
606 // version script (actually, a versym_node from a version script), and
607 // store its contents in DYNAMIC_LIST.
608 
609 bool
610 read_dynamic_list(const char* filename, Command_line* cmdline,
611                   Script_options* dynamic_list);
612 
613 } // End namespace gold.
614 
615 #endif // !defined(GOLD_SCRIPT_H)
616