1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 2000
4  *
5  * RTS Object Linker
6  *
7  * ---------------------------------------------------------------------------*/
8 
9 #pragma once
10 
11 #include "Rts.h"
12 #include "Hash.h"
13 #include "linker/M32Alloc.h"
14 
15 #if RTS_LINKER_USE_MMAP
16 #include <sys/mman.h>
17 #endif
18 
19 #include "BeginPrivate.h"
20 
21 typedef void SymbolAddr;
22 typedef char SymbolName;
23 
24 /* Hold extended information about a symbol in case we need to resolve it at a
25    late stage.  */
26 typedef struct _Symbol
27 {
28     SymbolName *name;
29     SymbolAddr *addr;
30 } Symbol_t;
31 
32 /* Indication of section kinds for loaded objects.  Needed by
33    the GC for deciding whether or not a pointer on the stack
34    is a code pointer.
35    See Note [BFD import library].
36 */
37 typedef
38    enum { /* Section is code or readonly. e.g. .text or .r(o)data.  */
39           SECTIONKIND_CODE_OR_RODATA,
40           /* Section contains read/write data. e.g. .data.  */
41           SECTIONKIND_RWDATA,
42           /* Static initializer section. e.g. .ctors.  */
43           SECTIONKIND_INIT_ARRAY,
44           /* Static finalizer section. e.g. .dtors.  */
45           SECTIONKIND_FINIT_ARRAY,
46           /* We don't know what the section is and don't care.  */
47           SECTIONKIND_OTHER,
48           /* Section contains debug information. e.g. .debug$.  */
49           SECTIONKIND_DEBUG,
50           /* Section belongs to an import section group. e.g. .idata$.  */
51           SECTIONKIND_IMPORT,
52           /* Section defines an import library entry, e.g. idata$7.  */
53           SECTIONKIND_IMPORT_LIBRARY,
54           SECTIONKIND_NOINFOAVAIL
55         }
56    SectionKind;
57 
58 typedef
59    enum { SECTION_NOMEM,
60           SECTION_M32,
61           SECTION_MMAP,
62           SECTION_MALLOC
63         }
64    SectionAlloc;
65 
66 /* Indicates a desired memory protection for pages within a segment. Defined as
67  * enum since it's more explicit and look nicer in a debugger.
68  *
69  * Can be used directly as a substitution for a combination of PROT_X flags on
70  * POSIX systems.
71  */
72 typedef enum {
73 #if RTS_LINKER_USE_MMAP
74     SEGMENT_PROT_RO  = PROT_READ,
75     SEGMENT_PROT_RX  = PROT_READ | PROT_EXEC,
76     SEGMENT_PROT_RWO = PROT_READ | PROT_WRITE,
77     SEGMENT_PROT_RWX = PROT_READ | PROT_WRITE | PROT_EXEC
78 #else
79     SEGMENT_PROT_RO,
80     SEGMENT_PROT_RX,
81     SEGMENT_PROT_RWO,
82     SEGMENT_PROT_RWX
83 #endif
84 } SegmentProt;
85 
86 /*
87  * Note [No typedefs for customizable types]
88  * Some pointer-to-struct types are defined opaquely
89  * first, and customized later to architecture/ABI-specific
90  * instantiations. Having the usual
91  *   typedef struct _Foo {...} Foo;
92  * wrappers is hard to get right with older versions of GCC,
93  * so just have a
94  *   struct Foo {...};
95  * and always refer to it with the 'struct' qualifier.
96  */
97 
98 typedef
99    struct _Section {
100       void*    start;              /* actual start of section in memory */
101       StgWord  size;               /* actual size of section in memory */
102       SectionKind kind;
103       SectionAlloc alloc;
104 
105       /*
106        * The following fields are relevant for SECTION_MMAP sections only
107        */
108       StgWord mapped_offset;      /* offset from the image of mapped_start */
109       void* mapped_start;         /* start of mmap() block */
110       StgWord mapped_size;        /* size of mmap() block */
111 
112       /* A customizable type to augment the Section type.
113        * See Note [No typedefs for customizable types]
114        */
115       struct SectionFormatInfo* info;
116    }
117    Section;
118 
119 typedef
120    struct _ProddableBlock {
121       void* start;
122       int   size;
123       struct _ProddableBlock* next;
124    }
125    ProddableBlock;
126 
127 typedef struct _Segment {
128     void *start;                /* page aligned start address of a segment */
129     size_t size;                /* page rounded size of a segment */
130     SegmentProt prot;           /* mem protection to set after all symbols were
131                                  * resolved */
132 
133     int *sections_idx;          /* an array of section indexes assigned to this segment */
134     int n_sections;
135 } Segment;
136 
137 #if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) || defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)
138 #define NEED_SYMBOL_EXTRAS 1
139 #endif
140 
141 /*
142  * We use the m32 allocator for symbol extras on Windows and other mmap-using
143  * platforms.
144  */
145 #if RTS_LINKER_USE_MMAP
146 #define NEED_M32 1
147 #endif
148 
149 /* Jump Islands are sniplets of machine code required for relative
150  * address relocations on the PowerPC, x86_64 and ARM.
151  */
152 typedef struct {
153 #if defined(powerpc_HOST_ARCH)
154     struct {
155         short lis_r12, hi_addr;
156         short ori_r12_r12, lo_addr;
157         long mtctr_r12;
158         long bctr;
159     } jumpIsland;
160 #elif defined(x86_64_HOST_ARCH)
161     uint64_t    addr;
162     uint8_t     jumpIsland[6];
163 #elif defined(arm_HOST_ARCH)
164     uint8_t     jumpIsland[16];
165 #endif
166 } SymbolExtra;
167 
168 
169 /* Top-level structure for an object module.  One of these is allocated
170  * for each object file in use.
171  */
172 typedef struct _ObjectCode {
173     OStatus    status;
174     pathchar  *fileName;
175     int        fileSize;     /* also mapped image size when using mmap() */
176     char*      formatName;            /* eg "ELF32", "DLL", "COFF", etc. */
177 
178     /* If this object is a member of an archive, archiveMemberName is
179      * like "libarchive.a(object.o)". Otherwise it's NULL.
180      */
181     char*      archiveMemberName;
182 
183     /* An array containing ptrs to all the symbol names copied from
184        this object into the global symbol hash table.  This is so that
185        we know which parts of the latter mapping to nuke when this
186        object is removed from the system. */
187     Symbol_t *symbols;
188     int    n_symbols;
189 
190     /* ptr to mem containing the object file image */
191     char*      image;
192 
193     /* A customizable type, that formats can use to augment ObjectCode
194      * See Note [No typedefs for customizable types]
195      */
196     struct ObjectCodeFormatInfo* info;
197 
198     /* non-zero if the object file was mmap'd, otherwise malloc'd */
199     int        imageMapped;
200 
201     /* record by how much image has been deliberately misaligned
202        after allocation, so that we can use realloc */
203     int        misalignment;
204 
205     /* The section-kind entries for this object module. An array. */
206     int n_sections;
207     Section* sections;
208 
209     int n_segments;
210     Segment *segments;
211 
212     //
213     // Garbage collection fields
214     //
215 
216     // Next object in `objects` list
217     struct _ObjectCode *next;
218 
219     // Previous object in `objects` list
220     struct _ObjectCode *prev;
221 
222     // Next object in `loaded_objects` list
223     struct _ObjectCode *next_loaded_object;
224 
225     // Mark bit
226     StgWord mark;
227 
228     // Set of dependencies (ObjectCode*) of the object file. Traverse
229     // dependencies using `iterHashTable`.
230     //
231     // New entries are added as we resolve symbols in an object file, in
232     // `lookupDependentSymbol`. When an object file uses multiple symbols from
233     // another object file we add the dependent multiple times, so we use a
234     // `HashTable` here rather than a list/array to avoid copies.
235     //
236     // Used when unloading object files. See Note [Object unloading] in
237     // CheckUnload.c.
238     HashSet *dependencies;
239 
240     //
241     // End of garbage collection fields
242     //
243 
244     /* SANITY CHECK ONLY: a list of the only memory regions which may
245        safely be prodded during relocation.  Any attempt to prod
246        outside one of these is an error in the linker. */
247     ProddableBlock* proddables;
248 
249 #if defined(ia64_HOST_ARCH)
250     /* Procedure Linkage Table for this object */
251     void *plt;
252     unsigned int pltIndex;
253 #endif
254 
255 #if defined(NEED_SYMBOL_EXTRAS)
256     SymbolExtra    *symbol_extras;
257     unsigned long   first_symbol_extra;
258     unsigned long   n_symbol_extras;
259 #endif
260     /* Additional memory that is preallocated and contiguous with image
261        which can be used used to relocate bss sections. */
262     char* bssBegin;
263     char* bssEnd;
264 
265     /* a list of all ForeignExportsLists owned by this object */
266     struct ForeignExportsList *foreign_exports;
267 
268     /* Holds the list of symbols in the .o file which
269        require extra information.*/
270     HashTable *extraInfos;
271 
272 #if defined(NEED_M32)
273     /* The m32 allocators used for allocating small sections and symbol extras
274      * during loading. We have two: one for (writeable) data and one for
275      * (read-only/executable) code. */
276     m32_allocator *rw_m32, *rx_m32;
277 #endif
278 } ObjectCode;
279 
280 #define OC_INFORMATIVE_FILENAME(OC)             \
281     ( (OC)->archiveMemberName ?                 \
282       (OC)->archiveMemberName :                 \
283       (OC)->fileName                            \
284     )
285 
286 #if defined(THREADED_RTS)
287 extern Mutex linker_mutex;
288 #endif
289 
290 /* Type of the initializer */
291 typedef void (*init_t) (int argc, char **argv, char **env);
292 
293 /* SymbolInfo tracks a symbol's address, the object code from which
294    it originated, and whether or not it's weak.
295 
296    RtsSymbolInfo is used to track the state of the symbols currently
297    loaded or to be loaded by the Linker.
298 
299    Where the information in the `ObjectCode` is used to track the
300    original status of the symbol inside the `ObjectCode`.
301 
302    A weak symbol that has been used will still be marked as weak
303    in the `ObjectCode` but in the `RtsSymbolInfo` it won't be.
304 */
305 typedef struct _RtsSymbolInfo {
306     SymbolAddr* value;
307     ObjectCode *owner;
308     HsBool weak;
309 } RtsSymbolInfo;
310 
311 void exitLinker( void );
312 
313 void freeObjectCode (ObjectCode *oc);
314 SymbolAddr* loadSymbol(SymbolName *lbl, RtsSymbolInfo *pinfo);
315 
316 void *mmapAnonForLinker (size_t bytes);
317 void *mmapForLinker (size_t bytes, uint32_t prot, uint32_t flags, int fd, int offset);
318 void mmapForLinkerMarkExecutable (void *start, size_t len);
319 
320 void addProddableBlock ( ObjectCode* oc, void* start, int size );
321 void checkProddableBlock (ObjectCode *oc, void *addr, size_t size );
322 void freeProddableBlocks (ObjectCode *oc);
323 
324 void addSection (Section *s, SectionKind kind, SectionAlloc alloc,
325                  void* start, StgWord size, StgWord mapped_offset,
326                  void* mapped_start, StgWord mapped_size);
327 
328 HsBool ghciLookupSymbolInfo(HashTable *table,
329                             const SymbolName* key, RtsSymbolInfo **result);
330 
331 int ghciInsertSymbolTable(
332     pathchar* obj_name,
333     HashTable *table,
334     const SymbolName* key,
335     SymbolAddr* data,
336     HsBool weak,
337     ObjectCode *owner);
338 
339 /* Lock-free version of lookupSymbol. When 'dependent' is not NULL, adds it as a
340  * dependent to the owner of the symbol. */
341 SymbolAddr* lookupDependentSymbol (SymbolName* lbl, ObjectCode *dependent);
342 
343 extern /*Str*/HashTable *symhash;
344 
345 pathchar*
346 resolveSymbolAddr (pathchar* buffer, int size,
347                    SymbolAddr* symbol, uintptr_t* top);
348 
349 /*************************************************
350  * Various bits of configuration
351  *************************************************/
352 
353 /* PowerPC and ARM have relative branch instructions with only 24 bit
354  * displacements and therefore need jump islands contiguous with each object
355  * code module.
356  */
357 #if defined(powerpc_HOST_ARCH)
358 #define SHORT_REL_BRANCH 1
359 #endif
360 #if defined(arm_HOST_ARCH)
361 #define SHORT_REL_BRANCH 1
362 #endif
363 
364 #if (RTS_LINKER_USE_MMAP && defined(SHORT_REL_BRANCH) && defined(linux_HOST_OS))
365 #define USE_CONTIGUOUS_MMAP 1
366 #else
367 #define USE_CONTIGUOUS_MMAP 0
368 #endif
369 
370 HsInt isAlreadyLoaded( pathchar *path );
371 HsInt loadOc( ObjectCode* oc );
372 ObjectCode* mkOc( pathchar *path, char *image, int imageSize,
373                   bool mapped, char *archiveMemberName,
374                   int misalignment
375                   );
376 
377 void initSegment(Segment *s, void *start, size_t size, SegmentProt prot, int n_sections);
378 void freeSegments(ObjectCode *oc);
379 
380 /* MAP_ANONYMOUS is MAP_ANON on some systems,
381    e.g. OS X (before Sierra), OpenBSD etc */
382 #if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
383 #define MAP_ANONYMOUS MAP_ANON
384 #endif
385 
386 /* Which object file format are we targetting? */
387 #if defined(linux_HOST_OS) || defined(solaris2_HOST_OS) \
388 || defined(linux_android_HOST_OS) \
389 || defined(freebsd_HOST_OS) || defined(kfreebsdgnu_HOST_OS) \
390 || defined(dragonfly_HOST_OS) || defined(netbsd_HOST_OS) \
391 || defined(openbsd_HOST_OS) || defined(gnu_HOST_OS)
392 #  define OBJFORMAT_ELF
393 #  include "linker/ElfTypes.h"
394 #elif defined(mingw32_HOST_OS)
395 #  define OBJFORMAT_PEi386
396 #  include "linker/PEi386Types.h"
397 #elif defined(darwin_HOST_OS) || defined(ios_HOST_OS)
398 #  define OBJFORMAT_MACHO
399 #  include "linker/MachOTypes.h"
400 #else
401 #error "Unknown OBJECT_FORMAT for HOST_OS"
402 #endif
403 
404 /* In order to simplify control flow a bit, some references to mmap-related
405    definitions are blocked off by a C-level if statement rather than a CPP-level
406    #if statement. Since those are dead branches when !RTS_LINKER_USE_MMAP, we
407    just stub out the relevant symbols here
408 */
409 #if !RTS_LINKER_USE_MMAP
410 #define munmap(x,y) /* nothing */
411 #define MAP_ANONYMOUS 0
412 #endif
413 
414 #include "EndPrivate.h"
415