1 // merge.h -- handle section merging for gold  -*- C++ -*-
2 
3 // Copyright (C) 2006-2016 Free Software Foundation, Inc.
4 // Written by Ian Lance Taylor <iant@google.com>.
5 
6 // This file is part of gold.
7 
8 // This program is free software; you can redistribute it and/or modify
9 // it under the terms of the GNU General Public License as published by
10 // the Free Software Foundation; either version 3 of the License, or
11 // (at your option) any later version.
12 
13 // This program is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 // GNU General Public License for more details.
17 
18 // You should have received a copy of the GNU General Public License
19 // along with this program; if not, write to the Free Software
20 // Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
21 // MA 02110-1301, USA.
22 
23 #ifndef GOLD_MERGE_H
24 #define GOLD_MERGE_H
25 
26 #include <climits>
27 #include <map>
28 #include <vector>
29 
30 #include "stringpool.h"
31 #include "output.h"
32 
33 namespace gold
34 {
35 
36 // For each object with merge sections, we store an Object_merge_map.
37 // This is used to map locations in input sections to a merged output
38 // section.  The output section itself is not recorded here--it can be
39 // found in the output_sections_ field of the Object.
40 
41 class Object_merge_map
42 {
43  public:
44   Object_merge_map()
45     : section_merge_maps_()
46   { }
47 
48   ~Object_merge_map();
49 
50   // Add a mapping for MERGE_MAP, for the bytes from OFFSET to OFFSET
51   // + LENGTH in the input section SHNDX to OUTPUT_OFFSET in the
52   // output section.  An OUTPUT_OFFSET of -1 means that the bytes are
53   // discarded.  OUTPUT_OFFSET is relative to the start of the merged
54   // data in the output section.
55   void
56   add_mapping(const Output_section_data*, unsigned int shndx,
57               section_offset_type offset, section_size_type length,
58               section_offset_type output_offset);
59 
60   // Get the output offset for an input address.  MERGE_MAP is the map
61   // we are looking for, or NULL if we don't care.  The input address
62   // is at offset OFFSET in section SHNDX.  This sets *OUTPUT_OFFSET
63   // to the offset in the output section; this will be -1 if the bytes
64   // are not being copied to the output.  This returns true if the
65   // mapping is known, false otherwise.  *OUTPUT_OFFSET is relative to
66   // the start of the merged data in the output section.
67   bool
68   get_output_offset(unsigned int shndx,
69 		    section_offset_type offset,
70 		    section_offset_type* output_offset);
71 
72   const Output_section_data*
73   find_merge_section(unsigned int shndx) const;
74 
75   // Initialize an mapping from input offsets to output addresses for
76   // section SHNDX.  STARTING_ADDRESS is the output address of the
77   // merged section.
78   template<int size>
79   void
80   initialize_input_to_output_map(
81       unsigned int shndx,
82       typename elfcpp::Elf_types<size>::Elf_Addr starting_address,
83       Unordered_map<section_offset_type,
84 		    typename elfcpp::Elf_types<size>::Elf_Addr>*);
85 
86   // Map input section offsets to a length and an output section
87   // offset.  An output section offset of -1 means that this part of
88   // the input section is being discarded.
89   struct Input_merge_entry
90   {
91     // The offset in the input section.
92     section_offset_type input_offset;
93     // The length.
94     section_size_type length;
95     // The offset in the output section.
96     section_offset_type output_offset;
97   };
98 
99   // A list of entries for a particular input section.
100   struct Input_merge_map
101   {
102     void add_mapping(section_offset_type input_offset, section_size_type length,
103                      section_offset_type output_offset);
104 
105     typedef std::vector<Input_merge_entry> Entries;
106 
107     // We store these with the Relobj, and we look them up by input
108     // section.  It is possible to have two different merge maps
109     // associated with a single output section.  For example, this
110     // happens routinely with .rodata, when merged string constants
111     // and merged fixed size constants are both put into .rodata.  The
112     // output offset that we store is not the offset from the start of
113     // the output section; it is the offset from the start of the
114     // merged data in the output section.  That means that the caller
115     // is going to add the offset of the merged data within the output
116     // section, which means that the caller needs to know which set of
117     // merged data it found the entry in.  So it's not enough to find
118     // this data based on the input section and the output section; we
119     // also have to find it based on a set of merged data in the
120     // output section.  In order to verify that we are looking at the
121     // right data, we store a pointer to the Merge_map here, and we
122     // pass in a pointer when looking at the data.  If we are asked to
123     // look up information for a different Merge_map, we report that
124     // we don't have it, rather than trying a lookup and returning an
125     // answer which will receive the wrong offset.
126     const Output_section_data* output_data;
127     // The list of mappings.
128     Entries entries;
129     // Whether the ENTRIES field is sorted by input_offset.
130     bool sorted;
131 
132     Input_merge_map()
133       : output_data(NULL), entries(), sorted(true)
134     { }
135   };
136 
137   // Get or make the Input_merge_map to use for the section SHNDX
138   // with MERGE_MAP.
139   Input_merge_map*
140   get_or_make_input_merge_map(const Output_section_data* merge_map,
141                               unsigned int shndx);
142 
143   private:
144   // A less-than comparison routine for Input_merge_entry.
145   struct Input_merge_compare
146   {
147     bool
148     operator()(const Input_merge_entry& i1, const Input_merge_entry& i2) const
149     { return i1.input_offset < i2.input_offset; }
150   };
151 
152   // Map input section indices to merge maps.
153   typedef std::vector<std::pair<unsigned int, Input_merge_map*> >
154       Section_merge_maps;
155 
156   // Return a pointer to the Input_merge_map to use for the input
157   // section SHNDX, or NULL.
158   const Input_merge_map*
159   get_input_merge_map(unsigned int shndx) const;
160 
161   Input_merge_map *
162   get_input_merge_map(unsigned int shndx) {
163     return const_cast<Input_merge_map *>(static_cast<const Object_merge_map *>(
164                                              this)->get_input_merge_map(shndx));
165   }
166 
167   Section_merge_maps section_merge_maps_;
168 };
169 
170 // A general class for SHF_MERGE data, to hold functions shared by
171 // fixed-size constant data and string data.
172 
173 class Output_merge_base : public Output_section_data
174 {
175  public:
176   Output_merge_base(uint64_t entsize, uint64_t addralign)
177     : Output_section_data(addralign), entsize_(entsize),
178       keeps_input_sections_(false), first_relobj_(NULL), first_shndx_(-1),
179       input_sections_()
180   { }
181 
182   // Return the entry size.
183   uint64_t
184   entsize() const
185   { return this->entsize_; }
186 
187   // Whether this is a merge string section.  This is only true of
188   // Output_merge_string.
189   bool
190   is_string()
191   { return this->do_is_string(); }
192 
193   // Whether this keeps input sections.
194   bool
195   keeps_input_sections() const
196   { return this->keeps_input_sections_; }
197 
198   // Set the keeps-input-sections flag.  This is virtual so that sub-classes
199   // can perform additional checks.
200   void
201   set_keeps_input_sections()
202   { this->do_set_keeps_input_sections(); }
203 
204   // Return the object of the first merged input section.  This used
205   // for script processing.  This is NULL if merge section is empty.
206   Relobj*
207   first_relobj() const
208   { return this->first_relobj_; }
209 
210   // Return the section index of the first merged input section.  This
211   // is used for script processing.  This is valid only if merge section
212   // is not valid.
213   unsigned int
214   first_shndx() const
215   {
216     gold_assert(this->first_relobj_ != NULL);
217     return this->first_shndx_;
218   }
219 
220   // Set of merged input sections.
221   typedef Unordered_set<Section_id, Section_id_hash> Input_sections;
222 
223   // Beginning of merged input sections.
224   Input_sections::const_iterator
225   input_sections_begin() const
226   {
227     gold_assert(this->keeps_input_sections_);
228     return this->input_sections_.begin();
229   }
230 
231   // Beginning of merged input sections.
232   Input_sections::const_iterator
233   input_sections_end() const
234   {
235     gold_assert(this->keeps_input_sections_);
236     return this->input_sections_.end();
237   }
238 
239  protected:
240   // Return the output offset for an input offset.
241   bool
242   do_output_offset(const Relobj* object, unsigned int shndx,
243 		   section_offset_type offset,
244 		   section_offset_type* poutput) const;
245 
246   // This may be overridden by the child class.
247   virtual bool
248   do_is_string()
249   { return false; }
250 
251   // This may be overridden by the child class.
252   virtual void
253   do_set_keeps_input_sections()
254   { this->keeps_input_sections_ = true; }
255 
256   // Record the merged input section for script processing.
257   void
258   record_input_section(Relobj* relobj, unsigned int shndx);
259 
260  private:
261   // The entry size.  For fixed-size constants, this is the size of
262   // the constants.  For strings, this is the size of a character.
263   uint64_t entsize_;
264   // Whether we keep input sections.
265   bool keeps_input_sections_;
266   // Object of the first merged input section.  We use this for script
267   // processing.
268   Relobj* first_relobj_;
269   // Section index of the first merged input section.
270   unsigned int first_shndx_;
271   // Input sections.  We only keep them is keeps_input_sections_ is true.
272   Input_sections input_sections_;
273 };
274 
275 // Handle SHF_MERGE sections with fixed-size constant data.
276 
277 class Output_merge_data : public Output_merge_base
278 {
279  public:
280   Output_merge_data(uint64_t entsize, uint64_t addralign)
281     : Output_merge_base(entsize, addralign), p_(NULL), len_(0), alc_(0),
282       input_count_(0),
283       hashtable_(128, Merge_data_hash(this), Merge_data_eq(this))
284   { }
285 
286  protected:
287   // Add an input section.
288   bool
289   do_add_input_section(Relobj* object, unsigned int shndx);
290 
291   // Set the final data size.
292   void
293   set_final_data_size();
294 
295   // Write the data to the file.
296   void
297   do_write(Output_file*);
298 
299   // Write the data to a buffer.
300   void
301   do_write_to_buffer(unsigned char*);
302 
303   // Write to a map file.
304   void
305   do_print_to_mapfile(Mapfile* mapfile) const
306   { mapfile->print_output_data(this, _("** merge constants")); }
307 
308   // Print merge stats to stderr.
309   void
310   do_print_merge_stats(const char* section_name);
311 
312   // Set keeps-input-sections flag.
313   void
314   do_set_keeps_input_sections()
315   {
316     gold_assert(this->input_count_ == 0);
317     Output_merge_base::do_set_keeps_input_sections();
318   }
319 
320  private:
321   // We build a hash table of the fixed-size constants.  Each constant
322   // is stored as a pointer into the section data we are accumulating.
323 
324   // A key in the hash table.  This is an offset in the section
325   // contents we are building.
326   typedef section_offset_type Merge_data_key;
327 
328   // Compute the hash code.  To do this we need a pointer back to the
329   // object holding the data.
330   class Merge_data_hash
331   {
332    public:
333     Merge_data_hash(const Output_merge_data* pomd)
334       : pomd_(pomd)
335     { }
336 
337     size_t
338     operator()(Merge_data_key) const;
339 
340    private:
341     const Output_merge_data* pomd_;
342   };
343 
344   friend class Merge_data_hash;
345 
346   // Compare two entries in the hash table for equality.  To do this
347   // we need a pointer back to the object holding the data.  Note that
348   // we now have a pointer to the object stored in two places in the
349   // hash table.  Fixing this would require specializing the hash
350   // table, which would be hard to do portably.
351   class Merge_data_eq
352   {
353    public:
354     Merge_data_eq(const Output_merge_data* pomd)
355       : pomd_(pomd)
356     { }
357 
358     bool
359     operator()(Merge_data_key k1, Merge_data_key k2) const;
360 
361    private:
362     const Output_merge_data* pomd_;
363   };
364 
365   friend class Merge_data_eq;
366 
367   // The type of the hash table.
368   typedef Unordered_set<Merge_data_key, Merge_data_hash, Merge_data_eq>
369     Merge_data_hashtable;
370 
371   // Given a hash table key, which is just an offset into the section
372   // data, return a pointer to the corresponding constant.
373   const unsigned char*
374   constant(Merge_data_key k) const
375   {
376     gold_assert(k >= 0 && k < static_cast<section_offset_type>(this->len_));
377     return this->p_ + k;
378   }
379 
380   // Add a constant to the output.
381   void
382   add_constant(const unsigned char*);
383 
384   // The accumulated data.
385   unsigned char* p_;
386   // The length of the accumulated data.
387   section_size_type len_;
388   // The size of the allocated buffer.
389   section_size_type alc_;
390   // The number of entries seen in input files.
391   size_t input_count_;
392   // The hash table.
393   Merge_data_hashtable hashtable_;
394 };
395 
396 // Handle SHF_MERGE sections with string data.  This is a template
397 // based on the type of the characters in the string.
398 
399 template<typename Char_type>
400 class Output_merge_string : public Output_merge_base
401 {
402  public:
403   Output_merge_string(uint64_t addralign)
404     : Output_merge_base(sizeof(Char_type), addralign), stringpool_(addralign),
405       merged_strings_lists_(), input_count_(0), input_size_(0)
406   {
407     this->stringpool_.set_no_zero_null();
408   }
409 
410  protected:
411   // Add an input section.
412   bool
413   do_add_input_section(Relobj* object, unsigned int shndx);
414 
415   // Do all the final processing after the input sections are read in.
416   // Returns the final data size.
417   section_size_type
418   finalize_merged_data();
419 
420   // Set the final data size.
421   void
422   set_final_data_size();
423 
424   // Write the data to the file.
425   void
426   do_write(Output_file*);
427 
428   // Write the data to a buffer.
429   void
430   do_write_to_buffer(unsigned char*);
431 
432   // Write to a map file.
433   void
434   do_print_to_mapfile(Mapfile* mapfile) const
435   { mapfile->print_output_data(this, _("** merge strings")); }
436 
437   // Print merge stats to stderr.
438   void
439   do_print_merge_stats(const char* section_name);
440 
441   // Writes the stringpool to a buffer.
442   void
443   stringpool_to_buffer(unsigned char* buffer, section_size_type buffer_size)
444   { this->stringpool_.write_to_buffer(buffer, buffer_size); }
445 
446   // Clears all the data in the stringpool, to save on memory.
447   void
448   clear_stringpool()
449   { this->stringpool_.clear(); }
450 
451   // Whether this is a merge string section.
452   virtual bool
453   do_is_string()
454   { return true; }
455 
456   // Set keeps-input-sections flag.
457   void
458   do_set_keeps_input_sections()
459   {
460     gold_assert(this->input_count_ == 0);
461     Output_merge_base::do_set_keeps_input_sections();
462   }
463 
464  private:
465   // The name of the string type, for stats.
466   const char*
467   string_name();
468 
469   // As we see input sections, we build a mapping from object, section
470   // index and offset to strings.
471   struct Merged_string
472   {
473     // The offset in the input section.
474     section_offset_type offset;
475     // The key in the Stringpool.
476     Stringpool::Key stringpool_key;
477 
478     Merged_string(section_offset_type offseta, Stringpool::Key stringpool_keya)
479       : offset(offseta), stringpool_key(stringpool_keya)
480     { }
481   };
482 
483   typedef std::vector<Merged_string> Merged_strings;
484 
485   struct Merged_strings_list
486   {
487     // The input object where the strings were found.
488     Relobj* object;
489     // The input section in the input object.
490     unsigned int shndx;
491     // The list of merged strings.
492     Merged_strings merged_strings;
493 
494     Merged_strings_list(Relobj* objecta, unsigned int shndxa)
495       : object(objecta), shndx(shndxa), merged_strings()
496     { }
497   };
498 
499   typedef std::vector<Merged_strings_list*> Merged_strings_lists;
500 
501   // As we see the strings, we add them to a Stringpool.
502   Stringpool_template<Char_type> stringpool_;
503   // Map from a location in an input object to an entry in the
504   // Stringpool.
505   Merged_strings_lists merged_strings_lists_;
506   // The number of entries seen in input files.
507   size_t input_count_;
508   // The total size of input sections.
509   size_t input_size_;
510 };
511 
512 } // End namespace gold.
513 
514 #endif // !defined(GOLD_MERGE_H)
515