1 /* ----------------------------------------------------------------------------- 2 * 3 * (c) The GHC Team, 2000 4 * 5 * RTS Object Linker 6 * 7 * ---------------------------------------------------------------------------*/ 8 9 #pragma once 10 11 #include "Rts.h" 12 #include "Hash.h" 13 #include "linker/M32Alloc.h" 14 15 #if RTS_LINKER_USE_MMAP 16 #include <sys/mman.h> 17 #endif 18 19 #include "BeginPrivate.h" 20 21 typedef void SymbolAddr; 22 typedef char SymbolName; 23 24 /* Hold extended information about a symbol in case we need to resolve it at a 25 late stage. */ 26 typedef struct _Symbol 27 { 28 SymbolName *name; 29 SymbolAddr *addr; 30 } Symbol_t; 31 32 /* Indication of section kinds for loaded objects. Needed by 33 the GC for deciding whether or not a pointer on the stack 34 is a code pointer. 35 See Note [BFD import library]. 36 */ 37 typedef 38 enum { /* Section is code or readonly. e.g. .text or .r(o)data. */ 39 SECTIONKIND_CODE_OR_RODATA, 40 /* Section contains read/write data. e.g. .data. */ 41 SECTIONKIND_RWDATA, 42 /* Static initializer section. e.g. .ctors. */ 43 SECTIONKIND_INIT_ARRAY, 44 /* Static finalizer section. e.g. .dtors. */ 45 SECTIONKIND_FINIT_ARRAY, 46 /* We don't know what the section is and don't care. */ 47 SECTIONKIND_OTHER, 48 /* Section contains debug information. e.g. .debug$. */ 49 SECTIONKIND_DEBUG, 50 /* Section belongs to an import section group. e.g. .idata$. */ 51 SECTIONKIND_IMPORT, 52 /* Section defines an import library entry, e.g. idata$7. */ 53 SECTIONKIND_IMPORT_LIBRARY, 54 SECTIONKIND_NOINFOAVAIL 55 } 56 SectionKind; 57 58 typedef 59 enum { SECTION_NOMEM, 60 SECTION_M32, 61 SECTION_MMAP, 62 SECTION_MALLOC 63 } 64 SectionAlloc; 65 66 /* Indicates a desired memory protection for pages within a segment. Defined as 67 * enum since it's more explicit and look nicer in a debugger. 68 * 69 * Can be used directly as a substitution for a combination of PROT_X flags on 70 * POSIX systems. 71 */ 72 typedef enum { 73 #if RTS_LINKER_USE_MMAP 74 SEGMENT_PROT_RO = PROT_READ, 75 SEGMENT_PROT_RX = PROT_READ | PROT_EXEC, 76 SEGMENT_PROT_RWO = PROT_READ | PROT_WRITE, 77 SEGMENT_PROT_RWX = PROT_READ | PROT_WRITE | PROT_EXEC 78 #else 79 SEGMENT_PROT_RO, 80 SEGMENT_PROT_RX, 81 SEGMENT_PROT_RWO, 82 SEGMENT_PROT_RWX 83 #endif 84 } SegmentProt; 85 86 /* 87 * Note [No typedefs for customizable types] 88 * Some pointer-to-struct types are defined opaquely 89 * first, and customized later to architecture/ABI-specific 90 * instantiations. Having the usual 91 * typedef struct _Foo {...} Foo; 92 * wrappers is hard to get right with older versions of GCC, 93 * so just have a 94 * struct Foo {...}; 95 * and always refer to it with the 'struct' qualifier. 96 */ 97 98 typedef 99 struct _Section { 100 void* start; /* actual start of section in memory */ 101 StgWord size; /* actual size of section in memory */ 102 SectionKind kind; 103 SectionAlloc alloc; 104 105 /* 106 * The following fields are relevant for SECTION_MMAP sections only 107 */ 108 StgWord mapped_offset; /* offset from the image of mapped_start */ 109 void* mapped_start; /* start of mmap() block */ 110 StgWord mapped_size; /* size of mmap() block */ 111 112 /* A customizable type to augment the Section type. 113 * See Note [No typedefs for customizable types] 114 */ 115 struct SectionFormatInfo* info; 116 } 117 Section; 118 119 typedef 120 struct _ProddableBlock { 121 void* start; 122 int size; 123 struct _ProddableBlock* next; 124 } 125 ProddableBlock; 126 127 typedef struct _Segment { 128 void *start; /* page aligned start address of a segment */ 129 size_t size; /* page rounded size of a segment */ 130 SegmentProt prot; /* mem protection to set after all symbols were 131 * resolved */ 132 133 int *sections_idx; /* an array of section indexes assigned to this segment */ 134 int n_sections; 135 } Segment; 136 137 #if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) || defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) 138 #define NEED_SYMBOL_EXTRAS 1 139 #endif 140 141 /* 142 * We use the m32 allocator for symbol extras on Windows and other mmap-using 143 * platforms. 144 */ 145 #if RTS_LINKER_USE_MMAP 146 #define NEED_M32 1 147 #endif 148 149 /* Jump Islands are sniplets of machine code required for relative 150 * address relocations on the PowerPC, x86_64 and ARM. 151 */ 152 typedef struct { 153 #if defined(powerpc_HOST_ARCH) 154 struct { 155 short lis_r12, hi_addr; 156 short ori_r12_r12, lo_addr; 157 long mtctr_r12; 158 long bctr; 159 } jumpIsland; 160 #elif defined(x86_64_HOST_ARCH) 161 uint64_t addr; 162 uint8_t jumpIsland[6]; 163 #elif defined(arm_HOST_ARCH) 164 uint8_t jumpIsland[16]; 165 #endif 166 } SymbolExtra; 167 168 169 /* Top-level structure for an object module. One of these is allocated 170 * for each object file in use. 171 */ 172 typedef struct _ObjectCode { 173 OStatus status; 174 pathchar *fileName; 175 int fileSize; /* also mapped image size when using mmap() */ 176 char* formatName; /* eg "ELF32", "DLL", "COFF", etc. */ 177 178 /* If this object is a member of an archive, archiveMemberName is 179 * like "libarchive.a(object.o)". Otherwise it's NULL. 180 */ 181 char* archiveMemberName; 182 183 /* An array containing ptrs to all the symbol names copied from 184 this object into the global symbol hash table. This is so that 185 we know which parts of the latter mapping to nuke when this 186 object is removed from the system. */ 187 Symbol_t *symbols; 188 int n_symbols; 189 190 /* ptr to mem containing the object file image */ 191 char* image; 192 193 /* A customizable type, that formats can use to augment ObjectCode 194 * See Note [No typedefs for customizable types] 195 */ 196 struct ObjectCodeFormatInfo* info; 197 198 /* non-zero if the object file was mmap'd, otherwise malloc'd */ 199 int imageMapped; 200 201 /* record by how much image has been deliberately misaligned 202 after allocation, so that we can use realloc */ 203 int misalignment; 204 205 /* The section-kind entries for this object module. An array. */ 206 int n_sections; 207 Section* sections; 208 209 int n_segments; 210 Segment *segments; 211 212 // 213 // Garbage collection fields 214 // 215 216 // Next object in `objects` list 217 struct _ObjectCode *next; 218 219 // Previous object in `objects` list 220 struct _ObjectCode *prev; 221 222 // Next object in `loaded_objects` list 223 struct _ObjectCode *next_loaded_object; 224 225 // Mark bit 226 StgWord mark; 227 228 // Set of dependencies (ObjectCode*) of the object file. Traverse 229 // dependencies using `iterHashTable`. 230 // 231 // New entries are added as we resolve symbols in an object file, in 232 // `lookupDependentSymbol`. When an object file uses multiple symbols from 233 // another object file we add the dependent multiple times, so we use a 234 // `HashTable` here rather than a list/array to avoid copies. 235 // 236 // Used when unloading object files. See Note [Object unloading] in 237 // CheckUnload.c. 238 HashSet *dependencies; 239 240 // 241 // End of garbage collection fields 242 // 243 244 /* SANITY CHECK ONLY: a list of the only memory regions which may 245 safely be prodded during relocation. Any attempt to prod 246 outside one of these is an error in the linker. */ 247 ProddableBlock* proddables; 248 249 #if defined(ia64_HOST_ARCH) 250 /* Procedure Linkage Table for this object */ 251 void *plt; 252 unsigned int pltIndex; 253 #endif 254 255 #if defined(NEED_SYMBOL_EXTRAS) 256 SymbolExtra *symbol_extras; 257 unsigned long first_symbol_extra; 258 unsigned long n_symbol_extras; 259 #endif 260 /* Additional memory that is preallocated and contiguous with image 261 which can be used used to relocate bss sections. */ 262 char* bssBegin; 263 char* bssEnd; 264 265 /* a list of all ForeignExportsLists owned by this object */ 266 struct ForeignExportsList *foreign_exports; 267 268 /* Holds the list of symbols in the .o file which 269 require extra information.*/ 270 HashTable *extraInfos; 271 272 #if defined(NEED_M32) 273 /* The m32 allocators used for allocating small sections and symbol extras 274 * during loading. We have two: one for (writeable) data and one for 275 * (read-only/executable) code. */ 276 m32_allocator *rw_m32, *rx_m32; 277 #endif 278 } ObjectCode; 279 280 #define OC_INFORMATIVE_FILENAME(OC) \ 281 ( (OC)->archiveMemberName ? \ 282 (OC)->archiveMemberName : \ 283 (OC)->fileName \ 284 ) 285 286 #if defined(THREADED_RTS) 287 extern Mutex linker_mutex; 288 #endif 289 290 /* Type of the initializer */ 291 typedef void (*init_t) (int argc, char **argv, char **env); 292 293 /* SymbolInfo tracks a symbol's address, the object code from which 294 it originated, and whether or not it's weak. 295 296 RtsSymbolInfo is used to track the state of the symbols currently 297 loaded or to be loaded by the Linker. 298 299 Where the information in the `ObjectCode` is used to track the 300 original status of the symbol inside the `ObjectCode`. 301 302 A weak symbol that has been used will still be marked as weak 303 in the `ObjectCode` but in the `RtsSymbolInfo` it won't be. 304 */ 305 typedef struct _RtsSymbolInfo { 306 SymbolAddr* value; 307 ObjectCode *owner; 308 HsBool weak; 309 } RtsSymbolInfo; 310 311 void exitLinker( void ); 312 313 void freeObjectCode (ObjectCode *oc); 314 SymbolAddr* loadSymbol(SymbolName *lbl, RtsSymbolInfo *pinfo); 315 316 void *mmapAnonForLinker (size_t bytes); 317 void *mmapForLinker (size_t bytes, uint32_t prot, uint32_t flags, int fd, int offset); 318 void mmapForLinkerMarkExecutable (void *start, size_t len); 319 320 void addProddableBlock ( ObjectCode* oc, void* start, int size ); 321 void checkProddableBlock (ObjectCode *oc, void *addr, size_t size ); 322 void freeProddableBlocks (ObjectCode *oc); 323 324 void addSection (Section *s, SectionKind kind, SectionAlloc alloc, 325 void* start, StgWord size, StgWord mapped_offset, 326 void* mapped_start, StgWord mapped_size); 327 328 HsBool ghciLookupSymbolInfo(HashTable *table, 329 const SymbolName* key, RtsSymbolInfo **result); 330 331 int ghciInsertSymbolTable( 332 pathchar* obj_name, 333 HashTable *table, 334 const SymbolName* key, 335 SymbolAddr* data, 336 HsBool weak, 337 ObjectCode *owner); 338 339 /* Lock-free version of lookupSymbol. When 'dependent' is not NULL, adds it as a 340 * dependent to the owner of the symbol. */ 341 SymbolAddr* lookupDependentSymbol (SymbolName* lbl, ObjectCode *dependent); 342 343 extern /*Str*/HashTable *symhash; 344 345 pathchar* 346 resolveSymbolAddr (pathchar* buffer, int size, 347 SymbolAddr* symbol, uintptr_t* top); 348 349 /************************************************* 350 * Various bits of configuration 351 *************************************************/ 352 353 /* PowerPC and ARM have relative branch instructions with only 24 bit 354 * displacements and therefore need jump islands contiguous with each object 355 * code module. 356 */ 357 #if defined(powerpc_HOST_ARCH) 358 #define SHORT_REL_BRANCH 1 359 #endif 360 #if defined(arm_HOST_ARCH) 361 #define SHORT_REL_BRANCH 1 362 #endif 363 364 #if (RTS_LINKER_USE_MMAP && defined(SHORT_REL_BRANCH) && defined(linux_HOST_OS)) 365 #define USE_CONTIGUOUS_MMAP 1 366 #else 367 #define USE_CONTIGUOUS_MMAP 0 368 #endif 369 370 HsInt isAlreadyLoaded( pathchar *path ); 371 HsInt loadOc( ObjectCode* oc ); 372 ObjectCode* mkOc( pathchar *path, char *image, int imageSize, 373 bool mapped, char *archiveMemberName, 374 int misalignment 375 ); 376 377 void initSegment(Segment *s, void *start, size_t size, SegmentProt prot, int n_sections); 378 void freeSegments(ObjectCode *oc); 379 380 /* MAP_ANONYMOUS is MAP_ANON on some systems, 381 e.g. OS X (before Sierra), OpenBSD etc */ 382 #if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) 383 #define MAP_ANONYMOUS MAP_ANON 384 #endif 385 386 /* Which object file format are we targetting? */ 387 #if defined(linux_HOST_OS) || defined(solaris2_HOST_OS) \ 388 || defined(linux_android_HOST_OS) \ 389 || defined(freebsd_HOST_OS) || defined(kfreebsdgnu_HOST_OS) \ 390 || defined(dragonfly_HOST_OS) || defined(netbsd_HOST_OS) \ 391 || defined(openbsd_HOST_OS) || defined(gnu_HOST_OS) 392 # define OBJFORMAT_ELF 393 # include "linker/ElfTypes.h" 394 #elif defined(mingw32_HOST_OS) 395 # define OBJFORMAT_PEi386 396 # include "linker/PEi386Types.h" 397 #elif defined(darwin_HOST_OS) || defined(ios_HOST_OS) 398 # define OBJFORMAT_MACHO 399 # include "linker/MachOTypes.h" 400 #else 401 #error "Unknown OBJECT_FORMAT for HOST_OS" 402 #endif 403 404 /* In order to simplify control flow a bit, some references to mmap-related 405 definitions are blocked off by a C-level if statement rather than a CPP-level 406 #if statement. Since those are dead branches when !RTS_LINKER_USE_MMAP, we 407 just stub out the relevant symbols here 408 */ 409 #if !RTS_LINKER_USE_MMAP 410 #define munmap(x,y) /* nothing */ 411 #define MAP_ANONYMOUS 0 412 #endif 413 414 #include "EndPrivate.h" 415