1 // merge.h -- handle section merging for gold -*- C++ -*- 2 3 // Copyright (C) 2006-2021 Free Software Foundation, Inc. 4 // Written by Ian Lance Taylor <iant@google.com>. 5 6 // This file is part of gold. 7 8 // This program is free software; you can redistribute it and/or modify 9 // it under the terms of the GNU General Public License as published by 10 // the Free Software Foundation; either version 3 of the License, or 11 // (at your option) any later version. 12 13 // This program is distributed in the hope that it will be useful, 14 // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 // GNU General Public License for more details. 17 18 // You should have received a copy of the GNU General Public License 19 // along with this program; if not, write to the Free Software 20 // Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, 21 // MA 02110-1301, USA. 22 23 #ifndef GOLD_MERGE_H 24 #define GOLD_MERGE_H 25 26 #include <climits> 27 #include <map> 28 #include <vector> 29 30 #include "stringpool.h" 31 #include "output.h" 32 33 namespace gold 34 { 35 36 // For each object with merge sections, we store an Object_merge_map. 37 // This is used to map locations in input sections to a merged output 38 // section. The output section itself is not recorded here--it can be 39 // found in the output_sections_ field of the Object. 40 41 class Object_merge_map 42 { 43 public: Object_merge_map()44 Object_merge_map() 45 : section_merge_maps_() 46 { } 47 48 ~Object_merge_map(); 49 50 // Add a mapping for MERGE_MAP, for the bytes from OFFSET to OFFSET 51 // + LENGTH in the input section SHNDX to OUTPUT_OFFSET in the 52 // output section. An OUTPUT_OFFSET of -1 means that the bytes are 53 // discarded. OUTPUT_OFFSET is relative to the start of the merged 54 // data in the output section. 55 void 56 add_mapping(const Output_section_data*, unsigned int shndx, 57 section_offset_type offset, section_size_type length, 58 section_offset_type output_offset); 59 60 // Get the output offset for an input address. MERGE_MAP is the map 61 // we are looking for, or NULL if we don't care. The input address 62 // is at offset OFFSET in section SHNDX. This sets *OUTPUT_OFFSET 63 // to the offset in the output section; this will be -1 if the bytes 64 // are not being copied to the output. This returns true if the 65 // mapping is known, false otherwise. *OUTPUT_OFFSET is relative to 66 // the start of the merged data in the output section. 67 bool 68 get_output_offset(unsigned int shndx, 69 section_offset_type offset, 70 section_offset_type* output_offset); 71 72 const Output_section_data* 73 find_merge_section(unsigned int shndx) const; 74 75 // Initialize an mapping from input offsets to output addresses for 76 // section SHNDX. STARTING_ADDRESS is the output address of the 77 // merged section. 78 template<int size> 79 void 80 initialize_input_to_output_map( 81 unsigned int shndx, 82 typename elfcpp::Elf_types<size>::Elf_Addr starting_address, 83 Unordered_map<section_offset_type, 84 typename elfcpp::Elf_types<size>::Elf_Addr>*); 85 86 // Map input section offsets to a length and an output section 87 // offset. An output section offset of -1 means that this part of 88 // the input section is being discarded. 89 struct Input_merge_entry 90 { 91 // The offset in the input section. 92 section_offset_type input_offset; 93 // The length. 94 section_size_type length; 95 // The offset in the output section. 96 section_offset_type output_offset; 97 }; 98 99 // A list of entries for a particular input section. 100 struct Input_merge_map 101 { 102 void add_mapping(section_offset_type input_offset, section_size_type length, 103 section_offset_type output_offset); 104 105 typedef std::vector<Input_merge_entry> Entries; 106 107 // We store these with the Relobj, and we look them up by input 108 // section. It is possible to have two different merge maps 109 // associated with a single output section. For example, this 110 // happens routinely with .rodata, when merged string constants 111 // and merged fixed size constants are both put into .rodata. The 112 // output offset that we store is not the offset from the start of 113 // the output section; it is the offset from the start of the 114 // merged data in the output section. That means that the caller 115 // is going to add the offset of the merged data within the output 116 // section, which means that the caller needs to know which set of 117 // merged data it found the entry in. So it's not enough to find 118 // this data based on the input section and the output section; we 119 // also have to find it based on a set of merged data in the 120 // output section. In order to verify that we are looking at the 121 // right data, we store a pointer to the Merge_map here, and we 122 // pass in a pointer when looking at the data. If we are asked to 123 // look up information for a different Merge_map, we report that 124 // we don't have it, rather than trying a lookup and returning an 125 // answer which will receive the wrong offset. 126 const Output_section_data* output_data; 127 // The list of mappings. 128 Entries entries; 129 // Whether the ENTRIES field is sorted by input_offset. 130 bool sorted; 131 Input_merge_mapInput_merge_map132 Input_merge_map() 133 : output_data(NULL), entries(), sorted(true) 134 { } 135 }; 136 137 // Get or make the Input_merge_map to use for the section SHNDX 138 // with MERGE_MAP. 139 Input_merge_map* 140 get_or_make_input_merge_map(const Output_section_data* merge_map, 141 unsigned int shndx); 142 143 private: 144 // A less-than comparison routine for Input_merge_entry. 145 struct Input_merge_compare 146 { 147 bool operatorInput_merge_compare148 operator()(const Input_merge_entry& i1, const Input_merge_entry& i2) const 149 { return i1.input_offset < i2.input_offset; } 150 }; 151 152 // Map input section indices to merge maps. 153 typedef std::vector<std::pair<unsigned int, Input_merge_map*> > 154 Section_merge_maps; 155 156 // Return a pointer to the Input_merge_map to use for the input 157 // section SHNDX, or NULL. 158 const Input_merge_map* 159 get_input_merge_map(unsigned int shndx) const; 160 161 Input_merge_map * get_input_merge_map(unsigned int shndx)162 get_input_merge_map(unsigned int shndx) { 163 return const_cast<Input_merge_map *>(static_cast<const Object_merge_map *>( 164 this)->get_input_merge_map(shndx)); 165 } 166 167 Section_merge_maps section_merge_maps_; 168 }; 169 170 // A general class for SHF_MERGE data, to hold functions shared by 171 // fixed-size constant data and string data. 172 173 class Output_merge_base : public Output_section_data 174 { 175 public: Output_merge_base(uint64_t entsize,uint64_t addralign)176 Output_merge_base(uint64_t entsize, uint64_t addralign) 177 : Output_section_data(addralign), entsize_(entsize), 178 keeps_input_sections_(false), first_relobj_(NULL), first_shndx_(-1), 179 input_sections_() 180 { } 181 182 // Return the entry size. 183 uint64_t entsize()184 entsize() const 185 { return this->entsize_; } 186 187 // Whether this is a merge string section. This is only true of 188 // Output_merge_string. 189 bool is_string()190 is_string() 191 { return this->do_is_string(); } 192 193 // Whether this keeps input sections. 194 bool keeps_input_sections()195 keeps_input_sections() const 196 { return this->keeps_input_sections_; } 197 198 // Set the keeps-input-sections flag. This is virtual so that sub-classes 199 // can perform additional checks. 200 void set_keeps_input_sections()201 set_keeps_input_sections() 202 { this->do_set_keeps_input_sections(); } 203 204 // Return the object of the first merged input section. This used 205 // for script processing. This is NULL if merge section is empty. 206 Relobj* first_relobj()207 first_relobj() const 208 { return this->first_relobj_; } 209 210 // Return the section index of the first merged input section. This 211 // is used for script processing. This is valid only if merge section 212 // is not valid. 213 unsigned int first_shndx()214 first_shndx() const 215 { 216 gold_assert(this->first_relobj_ != NULL); 217 return this->first_shndx_; 218 } 219 220 // Set of merged input sections. 221 typedef Unordered_set<Section_id, Section_id_hash> Input_sections; 222 223 // Beginning of merged input sections. 224 Input_sections::const_iterator input_sections_begin()225 input_sections_begin() const 226 { 227 gold_assert(this->keeps_input_sections_); 228 return this->input_sections_.begin(); 229 } 230 231 // Beginning of merged input sections. 232 Input_sections::const_iterator input_sections_end()233 input_sections_end() const 234 { 235 gold_assert(this->keeps_input_sections_); 236 return this->input_sections_.end(); 237 } 238 239 protected: 240 // Return the output offset for an input offset. 241 bool 242 do_output_offset(const Relobj* object, unsigned int shndx, 243 section_offset_type offset, 244 section_offset_type* poutput) const; 245 246 // This may be overridden by the child class. 247 virtual bool do_is_string()248 do_is_string() 249 { return false; } 250 251 // This may be overridden by the child class. 252 virtual void do_set_keeps_input_sections()253 do_set_keeps_input_sections() 254 { this->keeps_input_sections_ = true; } 255 256 // Record the merged input section for script processing. 257 void 258 record_input_section(Relobj* relobj, unsigned int shndx); 259 260 private: 261 // The entry size. For fixed-size constants, this is the size of 262 // the constants. For strings, this is the size of a character. 263 uint64_t entsize_; 264 // Whether we keep input sections. 265 bool keeps_input_sections_; 266 // Object of the first merged input section. We use this for script 267 // processing. 268 Relobj* first_relobj_; 269 // Section index of the first merged input section. 270 unsigned int first_shndx_; 271 // Input sections. We only keep them is keeps_input_sections_ is true. 272 Input_sections input_sections_; 273 }; 274 275 // Handle SHF_MERGE sections with fixed-size constant data. 276 277 class Output_merge_data : public Output_merge_base 278 { 279 public: Output_merge_data(uint64_t entsize,uint64_t addralign)280 Output_merge_data(uint64_t entsize, uint64_t addralign) 281 : Output_merge_base(entsize, addralign), p_(NULL), len_(0), alc_(0), 282 input_count_(0), 283 hashtable_(128, Merge_data_hash(this), Merge_data_eq(this)) 284 { } 285 286 protected: 287 // Add an input section. 288 bool 289 do_add_input_section(Relobj* object, unsigned int shndx); 290 291 // Set the final data size. 292 void 293 set_final_data_size(); 294 295 // Write the data to the file. 296 void 297 do_write(Output_file*); 298 299 // Write the data to a buffer. 300 void 301 do_write_to_buffer(unsigned char*); 302 303 // Write to a map file. 304 void do_print_to_mapfile(Mapfile * mapfile)305 do_print_to_mapfile(Mapfile* mapfile) const 306 { mapfile->print_output_data(this, _("** merge constants")); } 307 308 // Print merge stats to stderr. 309 void 310 do_print_merge_stats(const char* section_name); 311 312 // Set keeps-input-sections flag. 313 void do_set_keeps_input_sections()314 do_set_keeps_input_sections() 315 { 316 gold_assert(this->input_count_ == 0); 317 Output_merge_base::do_set_keeps_input_sections(); 318 } 319 320 private: 321 // We build a hash table of the fixed-size constants. Each constant 322 // is stored as a pointer into the section data we are accumulating. 323 324 // A key in the hash table. This is an offset in the section 325 // contents we are building. 326 typedef section_offset_type Merge_data_key; 327 328 // Compute the hash code. To do this we need a pointer back to the 329 // object holding the data. 330 class Merge_data_hash 331 { 332 public: Merge_data_hash(const Output_merge_data * pomd)333 Merge_data_hash(const Output_merge_data* pomd) 334 : pomd_(pomd) 335 { } 336 337 size_t 338 operator()(Merge_data_key) const; 339 340 private: 341 const Output_merge_data* pomd_; 342 }; 343 344 friend class Merge_data_hash; 345 346 // Compare two entries in the hash table for equality. To do this 347 // we need a pointer back to the object holding the data. Note that 348 // we now have a pointer to the object stored in two places in the 349 // hash table. Fixing this would require specializing the hash 350 // table, which would be hard to do portably. 351 class Merge_data_eq 352 { 353 public: Merge_data_eq(const Output_merge_data * pomd)354 Merge_data_eq(const Output_merge_data* pomd) 355 : pomd_(pomd) 356 { } 357 358 bool 359 operator()(Merge_data_key k1, Merge_data_key k2) const; 360 361 private: 362 const Output_merge_data* pomd_; 363 }; 364 365 friend class Merge_data_eq; 366 367 // The type of the hash table. 368 typedef Unordered_set<Merge_data_key, Merge_data_hash, Merge_data_eq> 369 Merge_data_hashtable; 370 371 // Given a hash table key, which is just an offset into the section 372 // data, return a pointer to the corresponding constant. 373 const unsigned char* constant(Merge_data_key k)374 constant(Merge_data_key k) const 375 { 376 gold_assert(k >= 0 && k < static_cast<section_offset_type>(this->len_)); 377 return this->p_ + k; 378 } 379 380 // Add a constant to the output. 381 void 382 add_constant(const unsigned char*); 383 384 // The accumulated data. 385 unsigned char* p_; 386 // The length of the accumulated data. 387 section_size_type len_; 388 // The size of the allocated buffer. 389 section_size_type alc_; 390 // The number of entries seen in input files. 391 size_t input_count_; 392 // The hash table. 393 Merge_data_hashtable hashtable_; 394 }; 395 396 // Handle SHF_MERGE sections with string data. This is a template 397 // based on the type of the characters in the string. 398 399 template<typename Char_type> 400 class Output_merge_string : public Output_merge_base 401 { 402 public: Output_merge_string(uint64_t addralign)403 Output_merge_string(uint64_t addralign) 404 : Output_merge_base(sizeof(Char_type), addralign), stringpool_(addralign), 405 merged_strings_lists_(), input_count_(0), input_size_(0) 406 { 407 this->stringpool_.set_no_zero_null(); 408 } 409 410 protected: 411 // Add an input section. 412 bool 413 do_add_input_section(Relobj* object, unsigned int shndx); 414 415 // Do all the final processing after the input sections are read in. 416 // Returns the final data size. 417 section_size_type 418 finalize_merged_data(); 419 420 // Set the final data size. 421 void 422 set_final_data_size(); 423 424 // Write the data to the file. 425 void 426 do_write(Output_file*); 427 428 // Write the data to a buffer. 429 void 430 do_write_to_buffer(unsigned char*); 431 432 // Write to a map file. 433 void do_print_to_mapfile(Mapfile * mapfile)434 do_print_to_mapfile(Mapfile* mapfile) const 435 { mapfile->print_output_data(this, _("** merge strings")); } 436 437 // Print merge stats to stderr. 438 void 439 do_print_merge_stats(const char* section_name); 440 441 // Writes the stringpool to a buffer. 442 void stringpool_to_buffer(unsigned char * buffer,section_size_type buffer_size)443 stringpool_to_buffer(unsigned char* buffer, section_size_type buffer_size) 444 { this->stringpool_.write_to_buffer(buffer, buffer_size); } 445 446 // Clears all the data in the stringpool, to save on memory. 447 void clear_stringpool()448 clear_stringpool() 449 { this->stringpool_.clear(); } 450 451 // Whether this is a merge string section. 452 virtual bool do_is_string()453 do_is_string() 454 { return true; } 455 456 // Set keeps-input-sections flag. 457 void do_set_keeps_input_sections()458 do_set_keeps_input_sections() 459 { 460 gold_assert(this->input_count_ == 0); 461 Output_merge_base::do_set_keeps_input_sections(); 462 } 463 464 private: 465 // The name of the string type, for stats. 466 const char* 467 string_name(); 468 469 // As we see input sections, we build a mapping from object, section 470 // index and offset to strings. 471 struct Merged_string 472 { 473 // The offset in the input section. 474 section_offset_type offset; 475 // The key in the Stringpool. 476 Stringpool::Key stringpool_key; 477 Merged_stringMerged_string478 Merged_string(section_offset_type offseta, Stringpool::Key stringpool_keya) 479 : offset(offseta), stringpool_key(stringpool_keya) 480 { } 481 }; 482 483 typedef std::vector<Merged_string> Merged_strings; 484 485 struct Merged_strings_list 486 { 487 // The input object where the strings were found. 488 Relobj* object; 489 // The input section in the input object. 490 unsigned int shndx; 491 // The list of merged strings. 492 Merged_strings merged_strings; 493 Merged_strings_listMerged_strings_list494 Merged_strings_list(Relobj* objecta, unsigned int shndxa) 495 : object(objecta), shndx(shndxa), merged_strings() 496 { } 497 }; 498 499 typedef std::vector<Merged_strings_list*> Merged_strings_lists; 500 501 // As we see the strings, we add them to a Stringpool. 502 Stringpool_template<Char_type> stringpool_; 503 // Map from a location in an input object to an entry in the 504 // Stringpool. 505 Merged_strings_lists merged_strings_lists_; 506 // The number of entries seen in input files. 507 size_t input_count_; 508 // The total size of input sections. 509 size_t input_size_; 510 }; 511 512 } // End namespace gold. 513 514 #endif // !defined(GOLD_MERGE_H) 515