1 // fileread.h -- read files for gold   -*- C++ -*-
2 
3 // Copyright (C) 2006-2020 Free Software Foundation, Inc.
4 // Written by Ian Lance Taylor <iant@google.com>.
5 
6 // This file is part of gold.
7 
8 // This program is free software; you can redistribute it and/or modify
9 // it under the terms of the GNU General Public License as published by
10 // the Free Software Foundation; either version 3 of the License, or
11 // (at your option) any later version.
12 
13 // This program is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 // GNU General Public License for more details.
17 
18 // You should have received a copy of the GNU General Public License
19 // along with this program; if not, write to the Free Software
20 // Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
21 // MA 02110-1301, USA.
22 
23 // Classes used to read data from binary input files.
24 
25 #ifndef GOLD_FILEREAD_H
26 #define GOLD_FILEREAD_H
27 
28 #include <list>
29 #include <map>
30 #include <string>
31 #include <vector>
32 
33 #include "token.h"
34 
35 namespace gold
36 {
37 
38 // Since not all system supports stat.st_mtim and struct timespec,
39 // we define our own structure and fill the nanoseconds if we can.
40 
41 struct Timespec
42 {
TimespecTimespec43   Timespec()
44     : seconds(0), nanoseconds(0)
45   { }
46 
TimespecTimespec47   Timespec(time_t a_seconds, int a_nanoseconds)
48     : seconds(a_seconds), nanoseconds(a_nanoseconds)
49   { }
50 
51   time_t seconds;
52   int nanoseconds;
53 };
54 
55 // Get the last modified time of an unopened file.  Returns false if the
56 // file does not exist.
57 
58 bool
59 get_mtime(const char* filename, Timespec* mtime);
60 
61 class Position_dependent_options;
62 class Input_file_argument;
63 class Dirsearch;
64 class File_view;
65 
66 // File_read manages a file descriptor and mappings for a file we are
67 // reading.
68 
69 class File_read
70 {
71  public:
File_read()72   File_read()
73     : name_(), descriptor_(-1), is_descriptor_opened_(false), object_count_(0),
74       size_(0), token_(false), views_(), saved_views_(), mapped_bytes_(0),
75       released_(true), whole_file_view_(NULL)
76   { }
77 
78   ~File_read();
79 
80   // Open a file.
81   bool
82   open(const Task*, const std::string& name);
83 
84   // Pretend to open the file, but provide the file contents.  No
85   // actual file system activity will occur.  This is used for
86   // testing.
87   bool
88   open(const Task*, const std::string& name, const unsigned char* contents,
89        off_t size);
90 
91   // Return the file name.
92   const std::string&
filename()93   filename() const
94   { return this->name_; }
95 
96   // Add an object associated with a file.
97   void
add_object()98   add_object()
99   { ++this->object_count_; }
100 
101   // Remove an object associated with a file.
102   void
remove_object()103   remove_object()
104   { --this->object_count_; }
105 
106   // Lock the file for exclusive access within a particular Task::run
107   // execution.  This routine may only be called when the workqueue
108   // lock is held.
109   void
110   lock(const Task* t);
111 
112   // Unlock the file.
113   void
114   unlock(const Task* t);
115 
116   // Test whether the object is locked.
117   bool
118   is_locked() const;
119 
120   // Return the token, so that the task can be queued.
121   Task_token*
token()122   token()
123   { return &this->token_; }
124 
125   // Release the file.  This indicates that we aren't going to do
126   // anything further with it until it is unlocked.  This is used
127   // because a Task which locks the file never calls either lock or
128   // unlock; it just locks the token.  The basic rule is that a Task
129   // which locks a file via the Task::locks interface must explicitly
130   // call release() when it is done.  This is not necessary for code
131   // which calls unlock() on the file.
132   void
133   release();
134 
135   // Return the size of the file.
136   off_t
filesize()137   filesize() const
138   { return this->size_; }
139 
140   // Return a view into the file starting at file offset START for
141   // SIZE bytes.  OFFSET is the offset into the input file for the
142   // file we are reading; this is zero for a normal object file,
143   // non-zero for an object file in an archive.  ALIGNED is true if
144   // the data must be naturally aligned (i.e., aligned to the size
145   // of a target word); this only matters when OFFSET is not zero.
146   // The pointer will remain valid until the File_read is unlocked.
147   // It is an error if we can not read enough data from the file.
148   // The CACHE parameter is a hint as to whether it will be useful
149   // to cache this data for later accesses--i.e., later calls to
150   // get_view, read, or get_lasting_view which retrieve the same
151   // data.
152   const unsigned char*
153   get_view(off_t offset, off_t start, section_size_type size, bool aligned,
154 	   bool cache);
155 
156   // Read data from the file into the buffer P starting at file offset
157   // START for SIZE bytes.
158   void
159   read(off_t start, section_size_type size, void* p);
160 
161   // Return a lasting view into the file starting at file offset START
162   // for SIZE bytes.  This is allocated with new, and the caller is
163   // responsible for deleting it when done.  The data associated with
164   // this view will remain valid until the view is deleted.  It is an
165   // error if we can not read enough data from the file.  The OFFSET,
166   // ALIGNED and CACHE parameters are as in get_view.
167   File_view*
168   get_lasting_view(off_t offset, off_t start, section_size_type size,
169 		   bool aligned, bool cache);
170 
171   // Mark all views as no longer cached.
172   void
173   clear_view_cache_marks();
174 
175   // Discard all uncached views.  This is normally done by release(),
176   // but not for objects in archives.  FIXME: This is a complicated
177   // interface, and it would be nice to have something more automatic.
178   void
clear_uncached_views()179   clear_uncached_views()
180   { this->clear_views(CLEAR_VIEWS_ARCHIVE); }
181 
182   // A struct used to do a multiple read.
183   struct Read_multiple_entry
184   {
185     // The file offset of the data to read.
186     off_t file_offset;
187     // The amount of data to read.
188     section_size_type size;
189     // The buffer where the data should be placed.
190     unsigned char* buffer;
191 
Read_multiple_entryRead_multiple_entry192     Read_multiple_entry(off_t o, section_size_type s, unsigned char* b)
193       : file_offset(o), size(s), buffer(b)
194     { }
195   };
196 
197   typedef std::vector<Read_multiple_entry> Read_multiple;
198 
199   // Read a bunch of data from the file into various different
200   // locations.  The vector must be sorted by ascending file_offset.
201   // BASE is a base offset to be added to all the offsets in the
202   // vector.
203   void
204   read_multiple(off_t base, const Read_multiple&);
205 
206   // Dump statistical information to stderr.
207   static void
208   print_stats();
209 
210   // Write the dependency file listing all files read.
211   static void
212   write_dependency_file(const char* dependency_file_name,
213 			const char* output_file_name);
214 
215   // Record that a file was read.  File_read::open does this.
216   static void
217   record_file_read(const std::string& name);
218 
219   // Return the open file descriptor (for plugins).
220   int
descriptor()221   descriptor()
222   {
223     this->reopen_descriptor();
224     return this->descriptor_;
225   }
226 
227   // Return the file last modification time.  Calls gold_fatal if the stat
228   // system call failed.
229   Timespec
230   get_mtime();
231 
232  private:
233   // Control for what views to clear.
234   enum Clear_views_mode
235   {
236     // Clear uncached views not used by an archive.
237     CLEAR_VIEWS_NORMAL,
238     // Clear all uncached views (including in an archive).
239     CLEAR_VIEWS_ARCHIVE,
240     // Clear all views (i.e., we're destroying the file).
241     CLEAR_VIEWS_ALL
242   };
243 
244   // This class may not be copied.
245   File_read(const File_read&);
246   File_read& operator=(const File_read&);
247 
248   // Total bytes mapped into memory during the link if --stats.
249   static unsigned long long total_mapped_bytes;
250 
251   // Current number of bytes mapped into memory during the link if
252   // --stats.
253   static unsigned long long current_mapped_bytes;
254 
255   // High water mark of bytes mapped into memory during the link if
256   // --stats.
257   static unsigned long long maximum_mapped_bytes;
258 
259   // Set of names of all files read.
260   static std::vector<std::string> files_read;
261 
262   // A view into the file.
263   class View
264   {
265    public:
266     // Specifies how to dispose the data on destruction of the view.
267     enum Data_ownership
268     {
269       // Data owned by File object - nothing done in destructor.
270       DATA_NOT_OWNED,
271       // Data allocated with new[] and owned by this object - should
272       // use delete[].
273       DATA_ALLOCATED_ARRAY,
274       // Data mmapped and owned by this object - should munmap.
275       DATA_MMAPPED
276     };
277 
View(off_t start,section_size_type size,const unsigned char * data,unsigned int byteshift,bool cache,Data_ownership data_ownership)278     View(off_t start, section_size_type size, const unsigned char* data,
279 	 unsigned int byteshift, bool cache, Data_ownership data_ownership)
280       : start_(start), size_(size), data_(data), lock_count_(0),
281 	byteshift_(byteshift), cache_(cache), data_ownership_(data_ownership),
282 	accessed_(true)
283     { }
284 
285     ~View();
286 
287     off_t
start()288     start() const
289     { return this->start_; }
290 
291     section_size_type
size()292     size() const
293     { return this->size_; }
294 
295     const unsigned char*
data()296     data() const
297     { return this->data_; }
298 
299     void
300     lock();
301 
302     void
303     unlock();
304 
305     bool
306     is_locked();
307 
308     unsigned int
byteshift()309     byteshift() const
310     { return this->byteshift_; }
311 
312     void
set_cache()313     set_cache()
314     { this->cache_ = true; }
315 
316     void
clear_cache()317     clear_cache()
318     { this->cache_ = false; }
319 
320     bool
should_cache()321     should_cache() const
322     { return this->cache_; }
323 
324     void
set_accessed()325     set_accessed()
326     { this->accessed_ = true; }
327 
328     void
clear_accessed()329     clear_accessed()
330     { this->accessed_= false; }
331 
332     bool
accessed()333     accessed() const
334     { return this->accessed_; }
335 
336     // Returns TRUE if this view contains permanent data -- e.g., data that
337     // was supplied by the owner of the File object.
338     bool
is_permanent_view()339     is_permanent_view() const
340     { return this->data_ownership_ == DATA_NOT_OWNED; }
341 
342    private:
343     View(const View&);
344     View& operator=(const View&);
345 
346     // The file offset of the start of the view.
347     off_t start_;
348     // The size of the view.
349     section_size_type size_;
350     // A pointer to the actual bytes.
351     const unsigned char* data_;
352     // The number of locks on this view.
353     int lock_count_;
354     // The number of bytes that the view is shifted relative to the
355     // underlying file.  This is used to align data.  This is normally
356     // zero, except possibly for an object in an archive.
357     unsigned int byteshift_;
358     // Whether the view is cached.
359     bool cache_;
360     // Whether the view is mapped into memory.  If not, data_ points
361     // to memory allocated using new[].
362     Data_ownership data_ownership_;
363     // Whether the view has been accessed recently.
364     bool accessed_;
365   };
366 
367   friend class View;
368   friend class File_view;
369 
370   // The type of a mapping from page start and byte shift to views.
371   typedef std::map<std::pair<off_t, unsigned int>, View*> Views;
372 
373   // A simple list of Views.
374   typedef std::list<View*> Saved_views;
375 
376   // Open the descriptor if necessary.
377   void
378   reopen_descriptor();
379 
380   // Find a view into the file.
381   View*
382   find_view(off_t start, section_size_type size, unsigned int byteshift,
383 	    View** vshifted) const;
384 
385   // Read data from the file into a buffer.
386   void
387   do_read(off_t start, section_size_type size, void* p);
388 
389   // Add a view.
390   void
391   add_view(View*);
392 
393   // Make a view into the file.
394   View*
395   make_view(off_t start, section_size_type size, unsigned int byteshift,
396 	    bool cache);
397 
398   // Find or make a view into the file.
399   View*
400   find_or_make_view(off_t offset, off_t start, section_size_type size,
401 		    bool aligned, bool cache);
402 
403   // Clear the file views.
404   void
405   clear_views(Clear_views_mode);
406 
407   // The size of a file page for buffering data.
408   static const off_t page_size = 8192;
409 
410   // Given a file offset, return the page offset.
411   static off_t
page_offset(off_t file_offset)412   page_offset(off_t file_offset)
413   { return file_offset & ~ (page_size - 1); }
414 
415   // Given a file size, return the size to read integral pages.
416   static off_t
pages(off_t file_size)417   pages(off_t file_size)
418   { return (file_size + (page_size - 1)) & ~ (page_size - 1); }
419 
420   // The maximum number of entries we will pass to ::readv.
421   static const size_t max_readv_entries = 128;
422 
423   // Use readv to read data.
424   void
425   do_readv(off_t base, const Read_multiple&, size_t start, size_t count);
426 
427   // File name.
428   std::string name_;
429   // File descriptor.
430   int descriptor_;
431   // Whether we have regained the descriptor after releasing the file.
432   bool is_descriptor_opened_;
433   // The number of objects associated with this file.  This will be
434   // more than 1 in the case of an archive.
435   int object_count_;
436   // File size.
437   off_t size_;
438   // A token used to lock the file.
439   Task_token token_;
440   // Buffered views into the file.
441   Views views_;
442   // List of views which were locked but had to be removed from views_
443   // because they were not large enough.
444   Saved_views saved_views_;
445   // Total amount of space mapped into memory.  This is only changed
446   // while the file is locked.  When we unlock the file, we transfer
447   // the total to total_mapped_bytes, and reset this to zero.
448   size_t mapped_bytes_;
449   // Whether the file was released.
450   bool released_;
451   // A view containing the whole file.  May be NULL if we mmap only
452   // the relevant parts of the file.  Not NULL if:
453   // - Flag --mmap_whole_files is set (default on 64-bit hosts).
454   // - The contents was specified in the constructor.  Used only for
455   //   testing purposes).
456   View* whole_file_view_;
457 };
458 
459 // A view of file data that persists even when the file is unlocked.
460 // Callers should destroy these when no longer required.  These are
461 // obtained form File_read::get_lasting_view.  They may only be
462 // destroyed when the underlying File_read is locked.
463 
464 class File_view
465 {
466  public:
467   // This may only be called when the underlying File_read is locked.
468   ~File_view();
469 
470   // Return a pointer to the data associated with this view.
471   const unsigned char*
data()472   data() const
473   { return this->data_; }
474 
475  private:
476   File_view(const File_view&);
477   File_view& operator=(const File_view&);
478 
479   friend class File_read;
480 
481   // Callers have to get these via File_read::get_lasting_view.
File_view(File_read & file,File_read::View * view,const unsigned char * data)482   File_view(File_read& file, File_read::View* view, const unsigned char* data)
483     : file_(file), view_(view), data_(data)
484   { }
485 
486   File_read& file_;
487   File_read::View* view_;
488   const unsigned char* data_;
489 };
490 
491 // All the information we hold for a single input file.  This can be
492 // an object file, a shared library, or an archive.
493 
494 class Input_file
495 {
496  public:
497   enum Format
498   {
499     FORMAT_NONE,
500     FORMAT_ELF,
501     FORMAT_BINARY
502   };
503 
Input_file(const Input_file_argument * input_argument)504   Input_file(const Input_file_argument* input_argument)
505     : input_argument_(input_argument), found_name_(), file_(),
506       is_in_sysroot_(false), format_(FORMAT_NONE)
507   { }
508 
509   // Create an input file given just a filename.
510   Input_file(const char* name);
511 
512   // Create an input file with the contents already provided.  This is
513   // only used for testing.  With this path, don't call the open
514   // method.
515   Input_file(const Task*, const char* name, const unsigned char* contents,
516 	     off_t size);
517 
518   // Return the command line argument.
519   const Input_file_argument*
input_file_argument()520   input_file_argument() const
521   { return this->input_argument_; }
522 
523   // Return whether this is a file that we will search for in the list
524   // of directories.
525   bool
526   will_search_for() const;
527 
528   // Open the file.  If the open fails, this will report an error and
529   // return false.  If there is a search, it starts at directory
530   // *PINDEX.  *PINDEX should be initialized to zero.  It may be
531   // restarted to find the next file with a matching name by
532   // incrementing the result and calling this again.
533   bool
534   open(const Dirsearch&, const Task*, int* pindex);
535 
536   // Return the name given by the user.  For -lc this will return "c".
537   const char*
538   name() const;
539 
540   // Return the file name.  For -lc this will return something like
541   // "/usr/lib/libc.so".
542   const std::string&
filename()543   filename() const
544   { return this->file_.filename(); }
545 
546   // Return the name under which we found the file, corresponding to
547   // the command line.  For -lc this will return something like
548   // "libc.so".
549   const std::string&
found_name()550   found_name() const
551   { return this->found_name_; }
552 
553   // Return the position dependent options.
554   const Position_dependent_options&
555   options() const;
556 
557   // Return the file.
558   File_read&
file()559   file()
560   { return this->file_; }
561 
562   const File_read&
file()563   file() const
564   { return this->file_; }
565 
566   // Whether we found the file in a directory in the system root.
567   bool
is_in_sysroot()568   is_in_sysroot() const
569   { return this->is_in_sysroot_; }
570 
571   // Whether this file is in a system directory.
572   bool
573   is_in_system_directory() const;
574 
575   // Return whether this file is to be read only for its symbols.
576   bool
577   just_symbols() const;
578 
579   // Return the format of the unconverted input file.
580   Format
format()581   format() const
582   { return this->format_; }
583 
584   // Try to find a file in the extra search dirs.  Returns true on success.
585   static bool
586   try_extra_search_path(int* pindex,
587 			const Input_file_argument* input_argument,
588 			std::string filename, std::string* found_name,
589 			std::string* namep);
590 
591   // Find the actual file.
592   static bool
593   find_file(const Dirsearch& dirpath, int* pindex,
594 	    const Input_file_argument* input_argument,
595 	    bool* is_in_sysroot,
596 	    std::string* found_name, std::string* namep);
597 
598  private:
599   Input_file(const Input_file&);
600   Input_file& operator=(const Input_file&);
601 
602   // Open a binary file.
603   bool
604   open_binary(const Task* task, const std::string& name);
605 
606   // The argument from the command line.
607   const Input_file_argument* input_argument_;
608   // The name under which we opened the file.  This is like the name
609   // on the command line, but -lc turns into libc.so (or whatever).
610   // It only includes the full path if the path was on the command
611   // line.
612   std::string found_name_;
613   // The file after we open it.
614   File_read file_;
615   // Whether we found the file in a directory in the system root.
616   bool is_in_sysroot_;
617   // Format of unconverted input file.
618   Format format_;
619 };
620 
621 } // end namespace gold
622 
623 #endif // !defined(GOLD_FILEREAD_H)
624