1 // fileread.h -- read files for gold   -*- C++ -*-
2 
3 // Copyright (C) 2006-2016 Free Software Foundation, Inc.
4 // Written by Ian Lance Taylor <iant@google.com>.
5 
6 // This file is part of gold.
7 
8 // This program is free software; you can redistribute it and/or modify
9 // it under the terms of the GNU General Public License as published by
10 // the Free Software Foundation; either version 3 of the License, or
11 // (at your option) any later version.
12 
13 // This program is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 // GNU General Public License for more details.
17 
18 // You should have received a copy of the GNU General Public License
19 // along with this program; if not, write to the Free Software
20 // Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
21 // MA 02110-1301, USA.
22 
23 // Classes used to read data from binary input files.
24 
25 #ifndef GOLD_FILEREAD_H
26 #define GOLD_FILEREAD_H
27 
28 #include <list>
29 #include <map>
30 #include <string>
31 #include <vector>
32 
33 #include "token.h"
34 
35 namespace gold
36 {
37 
38 // Since not all system supports stat.st_mtim and struct timespec,
39 // we define our own structure and fill the nanoseconds if we can.
40 
41 struct Timespec
42 {
43   Timespec()
44     : seconds(0), nanoseconds(0)
45   { }
46 
47   Timespec(time_t a_seconds, int a_nanoseconds)
48     : seconds(a_seconds), nanoseconds(a_nanoseconds)
49   { }
50 
51   time_t seconds;
52   int nanoseconds;
53 };
54 
55 // Get the last modified time of an unopened file.  Returns false if the
56 // file does not exist.
57 
58 bool
59 get_mtime(const char* filename, Timespec* mtime);
60 
61 class Position_dependent_options;
62 class Input_file_argument;
63 class Dirsearch;
64 class File_view;
65 
66 // File_read manages a file descriptor and mappings for a file we are
67 // reading.
68 
69 class File_read
70 {
71  public:
72   File_read()
73     : name_(), descriptor_(-1), is_descriptor_opened_(false), object_count_(0),
74       size_(0), token_(false), views_(), saved_views_(), mapped_bytes_(0),
75       released_(true), whole_file_view_(NULL)
76   { }
77 
78   ~File_read();
79 
80   // Open a file.
81   bool
82   open(const Task*, const std::string& name);
83 
84   // Pretend to open the file, but provide the file contents.  No
85   // actual file system activity will occur.  This is used for
86   // testing.
87   bool
88   open(const Task*, const std::string& name, const unsigned char* contents,
89        off_t size);
90 
91   // Return the file name.
92   const std::string&
93   filename() const
94   { return this->name_; }
95 
96   // Add an object associated with a file.
97   void
98   add_object()
99   { ++this->object_count_; }
100 
101   // Remove an object associated with a file.
102   void
103   remove_object()
104   { --this->object_count_; }
105 
106   // Lock the file for exclusive access within a particular Task::run
107   // execution.  This routine may only be called when the workqueue
108   // lock is held.
109   void
110   lock(const Task* t);
111 
112   // Unlock the file.
113   void
114   unlock(const Task* t);
115 
116   // Test whether the object is locked.
117   bool
118   is_locked() const;
119 
120   // Return the token, so that the task can be queued.
121   Task_token*
122   token()
123   { return &this->token_; }
124 
125   // Release the file.  This indicates that we aren't going to do
126   // anything further with it until it is unlocked.  This is used
127   // because a Task which locks the file never calls either lock or
128   // unlock; it just locks the token.  The basic rule is that a Task
129   // which locks a file via the Task::locks interface must explicitly
130   // call release() when it is done.  This is not necessary for code
131   // which calls unlock() on the file.
132   void
133   release();
134 
135   // Return the size of the file.
136   off_t
137   filesize() const
138   { return this->size_; }
139 
140   // Return a view into the file starting at file offset START for
141   // SIZE bytes.  OFFSET is the offset into the input file for the
142   // file we are reading; this is zero for a normal object file,
143   // non-zero for an object file in an archive.  ALIGNED is true if
144   // the data must be naturally aligned (i.e., aligned to the size
145   // of a target word); this only matters when OFFSET is not zero.
146   // The pointer will remain valid until the File_read is unlocked.
147   // It is an error if we can not read enough data from the file.
148   // The CACHE parameter is a hint as to whether it will be useful
149   // to cache this data for later accesses--i.e., later calls to
150   // get_view, read, or get_lasting_view which retrieve the same
151   // data.
152   const unsigned char*
153   get_view(off_t offset, off_t start, section_size_type size, bool aligned,
154 	   bool cache);
155 
156   // Read data from the file into the buffer P starting at file offset
157   // START for SIZE bytes.
158   void
159   read(off_t start, section_size_type size, void* p);
160 
161   // Return a lasting view into the file starting at file offset START
162   // for SIZE bytes.  This is allocated with new, and the caller is
163   // responsible for deleting it when done.  The data associated with
164   // this view will remain valid until the view is deleted.  It is an
165   // error if we can not read enough data from the file.  The OFFSET,
166   // ALIGNED and CACHE parameters are as in get_view.
167   File_view*
168   get_lasting_view(off_t offset, off_t start, section_size_type size,
169 		   bool aligned, bool cache);
170 
171   // Mark all views as no longer cached.
172   void
173   clear_view_cache_marks();
174 
175   // Discard all uncached views.  This is normally done by release(),
176   // but not for objects in archives.  FIXME: This is a complicated
177   // interface, and it would be nice to have something more automatic.
178   void
179   clear_uncached_views()
180   { this->clear_views(CLEAR_VIEWS_ARCHIVE); }
181 
182   // A struct used to do a multiple read.
183   struct Read_multiple_entry
184   {
185     // The file offset of the data to read.
186     off_t file_offset;
187     // The amount of data to read.
188     section_size_type size;
189     // The buffer where the data should be placed.
190     unsigned char* buffer;
191 
192     Read_multiple_entry(off_t o, section_size_type s, unsigned char* b)
193       : file_offset(o), size(s), buffer(b)
194     { }
195   };
196 
197   typedef std::vector<Read_multiple_entry> Read_multiple;
198 
199   // Read a bunch of data from the file into various different
200   // locations.  The vector must be sorted by ascending file_offset.
201   // BASE is a base offset to be added to all the offsets in the
202   // vector.
203   void
204   read_multiple(off_t base, const Read_multiple&);
205 
206   // Dump statistical information to stderr.
207   static void
208   print_stats();
209 
210   // Return the open file descriptor (for plugins).
211   int
212   descriptor()
213   {
214     this->reopen_descriptor();
215     return this->descriptor_;
216   }
217 
218   // Return the file last modification time.  Calls gold_fatal if the stat
219   // system call failed.
220   Timespec
221   get_mtime();
222 
223  private:
224   // Control for what views to clear.
225   enum Clear_views_mode
226   {
227     // Clear uncached views not used by an archive.
228     CLEAR_VIEWS_NORMAL,
229     // Clear all uncached views (including in an archive).
230     CLEAR_VIEWS_ARCHIVE,
231     // Clear all views (i.e., we're destroying the file).
232     CLEAR_VIEWS_ALL
233   };
234 
235   // This class may not be copied.
236   File_read(const File_read&);
237   File_read& operator=(const File_read&);
238 
239   // Total bytes mapped into memory during the link if --stats.
240   static unsigned long long total_mapped_bytes;
241 
242   // Current number of bytes mapped into memory during the link if
243   // --stats.
244   static unsigned long long current_mapped_bytes;
245 
246   // High water mark of bytes mapped into memory during the link if
247   // --stats.
248   static unsigned long long maximum_mapped_bytes;
249 
250   // A view into the file.
251   class View
252   {
253    public:
254     // Specifies how to dispose the data on destruction of the view.
255     enum Data_ownership
256     {
257       // Data owned by File object - nothing done in destructor.
258       DATA_NOT_OWNED,
259       // Data allocated with new[] and owned by this object - should
260       // use delete[].
261       DATA_ALLOCATED_ARRAY,
262       // Data mmapped and owned by this object - should munmap.
263       DATA_MMAPPED
264     };
265 
266     View(off_t start, section_size_type size, const unsigned char* data,
267 	 unsigned int byteshift, bool cache, Data_ownership data_ownership)
268       : start_(start), size_(size), data_(data), lock_count_(0),
269 	byteshift_(byteshift), cache_(cache), data_ownership_(data_ownership),
270 	accessed_(true)
271     { }
272 
273     ~View();
274 
275     off_t
276     start() const
277     { return this->start_; }
278 
279     section_size_type
280     size() const
281     { return this->size_; }
282 
283     const unsigned char*
284     data() const
285     { return this->data_; }
286 
287     void
288     lock();
289 
290     void
291     unlock();
292 
293     bool
294     is_locked();
295 
296     unsigned int
297     byteshift() const
298     { return this->byteshift_; }
299 
300     void
301     set_cache()
302     { this->cache_ = true; }
303 
304     void
305     clear_cache()
306     { this->cache_ = false; }
307 
308     bool
309     should_cache() const
310     { return this->cache_; }
311 
312     void
313     set_accessed()
314     { this->accessed_ = true; }
315 
316     void
317     clear_accessed()
318     { this->accessed_= false; }
319 
320     bool
321     accessed() const
322     { return this->accessed_; }
323 
324     // Returns TRUE if this view contains permanent data -- e.g., data that
325     // was supplied by the owner of the File object.
326     bool
327     is_permanent_view() const
328     { return this->data_ownership_ == DATA_NOT_OWNED; }
329 
330    private:
331     View(const View&);
332     View& operator=(const View&);
333 
334     // The file offset of the start of the view.
335     off_t start_;
336     // The size of the view.
337     section_size_type size_;
338     // A pointer to the actual bytes.
339     const unsigned char* data_;
340     // The number of locks on this view.
341     int lock_count_;
342     // The number of bytes that the view is shifted relative to the
343     // underlying file.  This is used to align data.  This is normally
344     // zero, except possibly for an object in an archive.
345     unsigned int byteshift_;
346     // Whether the view is cached.
347     bool cache_;
348     // Whether the view is mapped into memory.  If not, data_ points
349     // to memory allocated using new[].
350     Data_ownership data_ownership_;
351     // Whether the view has been accessed recently.
352     bool accessed_;
353   };
354 
355   friend class View;
356   friend class File_view;
357 
358   // The type of a mapping from page start and byte shift to views.
359   typedef std::map<std::pair<off_t, unsigned int>, View*> Views;
360 
361   // A simple list of Views.
362   typedef std::list<View*> Saved_views;
363 
364   // Open the descriptor if necessary.
365   void
366   reopen_descriptor();
367 
368   // Find a view into the file.
369   View*
370   find_view(off_t start, section_size_type size, unsigned int byteshift,
371 	    View** vshifted) const;
372 
373   // Read data from the file into a buffer.
374   void
375   do_read(off_t start, section_size_type size, void* p);
376 
377   // Add a view.
378   void
379   add_view(View*);
380 
381   // Make a view into the file.
382   View*
383   make_view(off_t start, section_size_type size, unsigned int byteshift,
384 	    bool cache);
385 
386   // Find or make a view into the file.
387   View*
388   find_or_make_view(off_t offset, off_t start, section_size_type size,
389 		    bool aligned, bool cache);
390 
391   // Clear the file views.
392   void
393   clear_views(Clear_views_mode);
394 
395   // The size of a file page for buffering data.
396   static const off_t page_size = 8192;
397 
398   // Given a file offset, return the page offset.
399   static off_t
400   page_offset(off_t file_offset)
401   { return file_offset & ~ (page_size - 1); }
402 
403   // Given a file size, return the size to read integral pages.
404   static off_t
405   pages(off_t file_size)
406   { return (file_size + (page_size - 1)) & ~ (page_size - 1); }
407 
408   // The maximum number of entries we will pass to ::readv.
409   static const size_t max_readv_entries = 128;
410 
411   // Use readv to read data.
412   void
413   do_readv(off_t base, const Read_multiple&, size_t start, size_t count);
414 
415   // File name.
416   std::string name_;
417   // File descriptor.
418   int descriptor_;
419   // Whether we have regained the descriptor after releasing the file.
420   bool is_descriptor_opened_;
421   // The number of objects associated with this file.  This will be
422   // more than 1 in the case of an archive.
423   int object_count_;
424   // File size.
425   off_t size_;
426   // A token used to lock the file.
427   Task_token token_;
428   // Buffered views into the file.
429   Views views_;
430   // List of views which were locked but had to be removed from views_
431   // because they were not large enough.
432   Saved_views saved_views_;
433   // Total amount of space mapped into memory.  This is only changed
434   // while the file is locked.  When we unlock the file, we transfer
435   // the total to total_mapped_bytes, and reset this to zero.
436   size_t mapped_bytes_;
437   // Whether the file was released.
438   bool released_;
439   // A view containing the whole file.  May be NULL if we mmap only
440   // the relevant parts of the file.  Not NULL if:
441   // - Flag --mmap_whole_files is set (default on 64-bit hosts).
442   // - The contents was specified in the constructor.  Used only for
443   //   testing purposes).
444   View* whole_file_view_;
445 };
446 
447 // A view of file data that persists even when the file is unlocked.
448 // Callers should destroy these when no longer required.  These are
449 // obtained form File_read::get_lasting_view.  They may only be
450 // destroyed when the underlying File_read is locked.
451 
452 class File_view
453 {
454  public:
455   // This may only be called when the underlying File_read is locked.
456   ~File_view();
457 
458   // Return a pointer to the data associated with this view.
459   const unsigned char*
460   data() const
461   { return this->data_; }
462 
463  private:
464   File_view(const File_view&);
465   File_view& operator=(const File_view&);
466 
467   friend class File_read;
468 
469   // Callers have to get these via File_read::get_lasting_view.
470   File_view(File_read& file, File_read::View* view, const unsigned char* data)
471     : file_(file), view_(view), data_(data)
472   { }
473 
474   File_read& file_;
475   File_read::View* view_;
476   const unsigned char* data_;
477 };
478 
479 // All the information we hold for a single input file.  This can be
480 // an object file, a shared library, or an archive.
481 
482 class Input_file
483 {
484  public:
485   enum Format
486   {
487     FORMAT_NONE,
488     FORMAT_ELF,
489     FORMAT_BINARY
490   };
491 
492   Input_file(const Input_file_argument* input_argument)
493     : input_argument_(input_argument), found_name_(), file_(),
494       is_in_sysroot_(false), format_(FORMAT_NONE)
495   { }
496 
497   // Create an input file given just a filename.
498   Input_file(const char* name);
499 
500   // Create an input file with the contents already provided.  This is
501   // only used for testing.  With this path, don't call the open
502   // method.
503   Input_file(const Task*, const char* name, const unsigned char* contents,
504 	     off_t size);
505 
506   // Return the command line argument.
507   const Input_file_argument*
508   input_file_argument() const
509   { return this->input_argument_; }
510 
511   // Return whether this is a file that we will search for in the list
512   // of directories.
513   bool
514   will_search_for() const;
515 
516   // Open the file.  If the open fails, this will report an error and
517   // return false.  If there is a search, it starts at directory
518   // *PINDEX.  *PINDEX should be initialized to zero.  It may be
519   // restarted to find the next file with a matching name by
520   // incrementing the result and calling this again.
521   bool
522   open(const Dirsearch&, const Task*, int* pindex);
523 
524   // Return the name given by the user.  For -lc this will return "c".
525   const char*
526   name() const;
527 
528   // Return the file name.  For -lc this will return something like
529   // "/usr/lib/libc.so".
530   const std::string&
531   filename() const
532   { return this->file_.filename(); }
533 
534   // Return the name under which we found the file, corresponding to
535   // the command line.  For -lc this will return something like
536   // "libc.so".
537   const std::string&
538   found_name() const
539   { return this->found_name_; }
540 
541   // Return the position dependent options.
542   const Position_dependent_options&
543   options() const;
544 
545   // Return the file.
546   File_read&
547   file()
548   { return this->file_; }
549 
550   const File_read&
551   file() const
552   { return this->file_; }
553 
554   // Whether we found the file in a directory in the system root.
555   bool
556   is_in_sysroot() const
557   { return this->is_in_sysroot_; }
558 
559   // Whether this file is in a system directory.
560   bool
561   is_in_system_directory() const;
562 
563   // Return whether this file is to be read only for its symbols.
564   bool
565   just_symbols() const;
566 
567   // Return the format of the unconverted input file.
568   Format
569   format() const
570   { return this->format_; }
571 
572   // Try to find a file in the extra search dirs.  Returns true on success.
573   static bool
574   try_extra_search_path(int* pindex,
575 			const Input_file_argument* input_argument,
576 			std::string filename, std::string* found_name,
577 			std::string* namep);
578 
579   // Find the actual file.
580   static bool
581   find_file(const Dirsearch& dirpath, int* pindex,
582 	    const Input_file_argument* input_argument,
583 	    bool* is_in_sysroot,
584 	    std::string* found_name, std::string* namep);
585 
586  private:
587   Input_file(const Input_file&);
588   Input_file& operator=(const Input_file&);
589 
590   // Open a binary file.
591   bool
592   open_binary(const Task* task, const std::string& name);
593 
594   // The argument from the command line.
595   const Input_file_argument* input_argument_;
596   // The name under which we opened the file.  This is like the name
597   // on the command line, but -lc turns into libc.so (or whatever).
598   // It only includes the full path if the path was on the command
599   // line.
600   std::string found_name_;
601   // The file after we open it.
602   File_read file_;
603   // Whether we found the file in a directory in the system root.
604   bool is_in_sysroot_;
605   // Format of unconverted input file.
606   Format format_;
607 };
608 
609 } // end namespace gold
610 
611 #endif // !defined(GOLD_FILEREAD_H)
612