1 /**************************** macho.h **************************************** 2 * Author: Agner Fog 3 * Date created: 2007-01-06 4 * Last modified: 2008-05-23 5 * Project: objconv 6 * Module: macho.h 7 * Description: 8 * Header file for definition of data structures in 32 bit Mach-O object file. 9 * Also defines class MacSymbolTableBuilder 10 * Also defines structures for MacIntosh universal binaries 11 * 12 * Copyright 2006-2008 GNU General Public License http://www.gnu.org/licenses 13 * Parts (c) 2003 Apple public source license http://www.opensource.apple.com/apsl/ 14 ***********************************************************************************/ 15 #ifndef MACHO_H 16 #define MACHO_H 17 18 /********************** FILE HEADER **********************/ 19 20 struct MAC_header_32 { 21 uint32_t magic; // mach magic number identifier 22 uint32_t cputype; // cpu specifier 23 uint32_t cpusubtype; // machine specifier 24 uint32_t filetype; // type of file 25 uint32_t ncmds; // number of load commands 26 uint32_t sizeofcmds; // the size of all the load commands 27 uint32_t flags; // flags 28 }; 29 30 struct MAC_header_64 { 31 uint32_t magic; // mach magic number identifier 32 uint32_t cputype; // cpu specifier 33 uint32_t cpusubtype; // machine specifier 34 uint32_t filetype; // type of file 35 uint32_t ncmds; // number of load commands 36 uint32_t sizeofcmds; // the size of all the load commands 37 uint32_t flags; // flags 38 uint32_t reserved; // reserved for future use 39 }; 40 41 42 // Constant for the magic field of the MAC_header (32-bit architectures) 43 #define MAC_MAGIC_32 0xFEEDFACE // 32 bit little endian 44 #define MAC_MAGIC_64 0xFEEDFACF // 64 bit little endian 45 #define MAC_CIGAM_32 0xCEFAEDFE // 32 bit big endian 46 #define MAC_CIGAM_64 0xCFFAEDFE // 64 bit big endian 47 #define MAC_CIGAM_UNIV 0xBEBAFECA // MacIntosh universal binary 48 49 // Constants for cputype 50 #define MAC_CPU_TYPE_I386 7 51 #define MAC_CPU_TYPE_X86_64 0x1000007 52 #define MAC_CPU_TYPE_ARM 12 53 #define MAC_CPU_TYPE_SPARC 14 54 #define MAC_CPU_TYPE_POWERPC 18 55 #define MAC_CPU_TYPE_POWERPC64 0x1000012 56 57 // Constants for cpusubtype 58 #define MAC_CPU_SUBTYPE_I386_ALL 3 59 #define MAC_CPU_SUBTYPE_X86_64_ALL 3 60 #define MAC_CPU_SUBTYPE_ARM_ALL 0 61 #define MAC_CPU_SUBTYPE_SPARC_ALL 0 62 #define MAC_CPU_SUBTYPE_POWERPC_ALL 0 63 64 // Constants for the filetype field of the MAC_header 65 #define MAC_OBJECT 0x1 /* relocatable object file */ 66 #define MAC_EXECUTE 0x2 /* demand paged executable file */ 67 #define MAC_FVMLIB 0x3 /* fixed VM shared library file */ 68 #define MAC_CORE 0x4 /* core file */ 69 #define MAC_PRELOAD 0x5 /* preloaded executable file */ 70 #define MAC_DYLIB 0x6 /* dynamicly bound shared library file*/ 71 #define MAC_DYLINKER 0x7 /* dynamic link editor */ 72 #define MAC_BUNDLE 0x8 /* dynamicly bound bundle file */ 73 74 // Constants for the flags field of the MAC_header 75 #define MAC_NOUNDEFS 0x1 // the object file has no undefined references, can be executed 76 #define MAC_INCRLINK 0x2 // the object file is the output of an incremental link against a base file and can't be link edited again 77 #define MAC_DYLDLINK 0x4 // the object file is input for the dynamic linker and can't be staticly link edited again 78 #define MAC_BINDATLOAD 0x8 // the object file's undefined references are bound by the dynamic linker when loaded. 79 #define MAC_PREBOUND 0x10 // the file has it's dynamic undefined references prebound. 80 #define MAC_SPLIT_SEGS 0x20 // the file has its read-only and read-write segments split 81 #define MAC_LAZY_INIT 0x40 // the shared library init routine is to be run lazily via catching memory faults to its writeable segments (obsolete) 82 #define MAC_TWOLEVEL 0x80 // the image is using two-level name space bindings 83 #define MAC_FORCE_FLAT 0x100 // the executable is forcing all images to use flat name space bindings 84 #define MAC_NOMULTIDEFS 0x200 // this umbrella guarantees no multiple defintions of symbols in its sub-images so the two-level namespace hints can always be used 85 #define MAC_NOFIXPREBINDING 0x400 // do not have dyld notify the prebinding agent about this executable 86 #define MAC_PREBINDABLE 0x800 // the binary is not prebound but can have its prebinding redone. only used when MH_PREBOUND is not set 87 #define MAC_ALLMODSBOUND 0x1000 // indicates that this binary binds to all two-level namespace modules of its dependent libraries. only used when MH_PREBINDABLE and MH_TWOLEVEL are both set 88 #define MAC_SUBSECTIONS_VIA_SYMBOLS 0x2000 // safe to divide up the sections into sub-sections via symbols for dead code stripping 89 #define MAC_CANONICAL 0x4000 // the binary has been canonicalized via the unprebind operation 90 91 //?? 92 #define MAC_VM_PROT_NONE 0x00 93 #define MAC_VM_PROT_READ 0x01 94 #define MAC_VM_PROT_WRITE 0x02 95 #define MAC_VM_PROT_EXECUTE 0x04 96 #define MAC_VM_PROT_ALL 0x07 97 98 // Load commands 99 struct MAC_load_command { 100 uint32_t cmd; // type of load command 101 uint32_t cmdsize; // total size of command in bytes 102 }; 103 104 // Constants for the cmd field of all load commands, the type 105 #define MAC_LC_REQ_DYLD 0x80000000 // This bit is added if unknown command cannot be ignored 106 #define MAC_LC_SEGMENT 0x1 /* segment of this file to be mapped */ 107 #define MAC_LC_SYMTAB 0x2 /* link-edit stab symbol table info */ 108 #define MAC_LC_SYMSEG 0x3 /* link-edit gdb symbol table info (obsolete) */ 109 #define MAC_LC_THREAD 0x4 /* thread */ 110 #define MAC_LC_UNIXTHREAD 0x5 /* unix thread (includes a stack) */ 111 #define MAC_LC_LOADFVMLIB 0x6 /* load a specified fixed VM shared library */ 112 #define MAC_LC_IDFVMLIB 0x7 /* fixed VM shared library identification */ 113 #define MAC_LC_IDENT 0x8 /* object identification info (obsolete) */ 114 #define MAC_LC_FVMFILE 0x9 /* fixed VM file inclusion (internal use) */ 115 #define MAC_LC_PREPAGE 0xa /* prepage command (internal use) */ 116 #define MAC_LC_DYSYMTAB 0xb /* dynamic link-edit symbol table info */ 117 #define MAC_LC_LOAD_DYLIB 0xc /* load a dynamicly linked shared library */ 118 #define MAC_LC_ID_DYLIB 0xd /* dynamicly linked shared lib identification */ 119 #define MAC_LC_LOAD_DYLINKER 0xe /* load a dynamic linker */ 120 #define MAC_LC_ID_DYLINKER 0xf /* dynamic linker identification */ 121 #define MAC_LC_PREBOUND_DYLIB 0x10 /* modules prebound for a dynamicly linked shared library */ 122 #define MAC_LC_ROUTINES 0x11 /* image routines */ 123 #define MAC_LC_SUB_FRAMEWORK 0x12 /* sub framework */ 124 #define MAC_LC_SUB_UMBRELLA 0x13 /* sub umbrella */ 125 #define MAC_LC_SUB_CLIENT 0x14 /* sub client */ 126 #define MAC_LC_SUB_LIBRARY 0x15 /* sub library */ 127 #define MAC_LC_TWOLEVEL_HINTS 0x16 /* two-level namespace lookup hints */ 128 #define MAC_LC_PREBIND_CKSUM 0x17 /* prebind checksum */ 129 #define MAC_LC_LOAD_WEAK_DYLIB (0x18 | MAC_LC_REQ_DYLD) 130 #define MAC_LC_SEGMENT_64 0x19 /* 64-bit segment of this file to be mapped */ 131 #define MAC_LC_ROUTINES_64 0x1a /* 64-bit image routines */ 132 #define MAC_LC_UUID 0x1b /* the uuid */ 133 134 /* 135 * The segment load command indicates that a part of this file is to be 136 * mapped into the task's address space. The size of this segment in memory, 137 * vmsize, maybe equal to or larger than the amount to map from this file, 138 * filesize. The file is mapped starting at fileoff to the beginning of 139 * the segment in memory, vmaddr. The rest of the memory of the segment, 140 * if any, is allocated zero fill on demand. The segment's maximum virtual 141 * memory protection and initial virtual memory protection are specified 142 * by the maxprot and initprot fields. If the segment has sections then the 143 * section structures directly follow the segment command and their size is 144 * reflected in cmdsize. 145 */ 146 struct MAC_segment_command_32 { /* for 32-bit architectures */ 147 uint32_t cmd; /* LC_SEGMENT */ 148 uint32_t cmdsize; /* includes sizeof section structs */ 149 char segname[16]; /* segment name */ 150 uint32_t vmaddr; /* memory address of this segment */ 151 uint32_t vmsize; /* memory size of this segment */ 152 uint32_t fileoff; /* file offset of this segment */ 153 uint32_t filesize; /* amount to map from the file */ 154 uint32_t maxprot; /* maximum VM protection */ 155 uint32_t initprot; /* initial VM protection */ 156 uint32_t nsects; /* number of sections in segment */ 157 uint32_t flags; /* flags */ 158 }; 159 160 /* 161 * The 64-bit segment load command indicates that a part of this file is to be 162 * mapped into a 64-bit task's address space. If the 64-bit segment has 163 * sections then section_64 structures directly follow the 64-bit segment 164 * command and their size is reflected in cmdsize. 165 */ 166 struct MAC_segment_command_64 { /* for 64-bit architectures */ 167 uint32_t cmd; /* LC_SEGMENT_64 */ 168 uint32_t cmdsize; /* includes sizeof section_64 structs */ 169 char segname[16]; /* segment name */ 170 uint64_t vmaddr; /* memory address of this segment */ 171 uint64_t vmsize; /* memory size of this segment */ 172 uint64_t fileoff; /* file offset of this segment */ 173 uint64_t filesize; /* amount to map from the file */ 174 uint32_t maxprot; /* maximum VM protection */ 175 uint32_t initprot; /* initial VM protection */ 176 uint32_t nsects; /* number of sections in segment */ 177 uint32_t flags; /* flags */ 178 }; 179 180 181 /* Constants for the flags field of the segment_command */ 182 #define MAC_SG_HIGHVM 0x1 // the file contents for this segment is for the high part of the 183 // VM space, the low part is zero filled (for stacks in core files) 184 #define MAC_SG_FVMLIB 0x2 // this segment is the VM that is allocated by a fixed VM library, 185 // for overlap checking in the link editor 186 #define MAC_SG_NORELOC 0x4 // this segment has nothing that was relocated in it and nothing 187 // relocated to it, that is it maybe safely replaced without relocation 188 189 /* 190 * A segment is made up of zero or more sections. Non-MH_OBJECT files have 191 * all of their segments with the proper sections in each, and padded to the 192 * specified segment alignment when produced by the link editor. The first 193 * segment of a MH_EXECUTE and MH_FVMLIB format file contains the mach_header 194 * and load commands of the object file before it's first section. The zero 195 * fill sections are always last in their segment (in all formats). This 196 * allows the zeroed segment padding to be mapped into memory where zero fill 197 * sections might be. The gigabyte zero fill sections, those with the section 198 * type S_GB_ZEROFILL, can only be in a segment with sections of this type. 199 * These segments are then placed after all other segments. 200 * 201 * The MH_OBJECT format has all of it's sections in one segment for 202 * compactness. There is no padding to a specified segment boundary and the 203 * mach_header and load commands are not part of the segment. 204 * 205 * Sections with the same section name, sectname, going into the same segment, 206 * segname, are combined by the link editor. The resulting section is aligned 207 * to the maximum alignment of the combined sections and is the new section's 208 * alignment. The combined sections are aligned to their original alignment in 209 * the combined section. Any padded bytes to get the specified alignment are 210 * zeroed. 211 * 212 * The format of the relocation entries referenced by the reloff and nreloc 213 * fields of the section structure for mach object files is described in the 214 * header file <reloc.h>. 215 */ 216 struct MAC_section_32 { /* for 32-bit architectures */ 217 char sectname[16]; /* name of this section */ 218 char segname[16]; /* segment this section goes in */ 219 uint32_t addr; /* memory address of this section */ 220 uint32_t size; /* size in bytes of this section */ 221 uint32_t offset; /* file offset of this section */ 222 uint32_t align; /* section alignment (power of 2) */ 223 uint32_t reloff; /* file offset of relocation entries */ 224 uint32_t nreloc; /* number of relocation entries */ 225 uint32_t flags; /* flags (section type and attributes)*/ 226 uint32_t reserved1; /* reserved */ 227 uint32_t reserved2; /* reserved */ 228 }; 229 230 struct MAC_section_64 { /* for 64-bit architectures */ 231 char sectname[16]; /* name of this section */ 232 char segname[16]; /* segment this section goes in */ 233 uint64_t addr; /* memory address of this section */ 234 uint64_t size; /* size in bytes of this section */ 235 uint32_t offset; /* file offset of this section */ 236 uint32_t align; /* section alignment (power of 2) */ 237 uint32_t reloff; /* file offset of relocation entries */ 238 uint32_t nreloc; /* number of relocation entries */ 239 uint32_t flags; /* flags (section type and attributes)*/ 240 uint32_t reserved1; /* reserved (for offset or index) */ 241 uint32_t reserved2; /* reserved (for count or sizeof) */ 242 uint32_t reserved3; // reserved (Note: specified in loader.h, but not in MachORuntime.pdf) 243 }; 244 245 246 /* The flags field of a section structure is separated into two parts a section 247 * type and section attributes. The section types are mutually exclusive (it 248 * can only have one type) but the section attributes are not (it may have more 249 * than one attribute). */ 250 251 #define MAC_SECTION_TYPE 0x000000ff /* 256 section types */ 252 #define MAC_SECTION_ATTRIBUTES 0xffffff00 /* 24 section attributes */ 253 254 /* Constants for the type of a section */ 255 #define MAC_S_REGULAR 0x0 /* regular section */ 256 #define MAC_S_ZEROFILL 0x1 /* zero fill on demand section */ 257 #define MAC_S_CSTRING_LITERALS 0x2 /* section with only literal C strings*/ 258 #define MAC_S_4BYTE_LITERALS 0x3 /* section with only 4 byte literals */ 259 #define MAC_S_8BYTE_LITERALS 0x4 /* section with only 8 byte literals */ 260 #define MAC_S_LITERAL_POINTERS 0x5 /* section with only pointers to literals */ 261 262 /* For the two types of symbol pointers sections and the symbol stubs section 263 * they have indirect symbol table entries. For each of the entries in the 264 * section the indirect symbol table entries, in corresponding order in the 265 * indirect symbol table, start at the index stored in the reserved1 field 266 * of the section structure. Since the indirect symbol table entries 267 * correspond to the entries in the section the number of indirect symbol table 268 * entries is inferred from the size of the section divided by the size of the 269 * entries in the section. For symbol pointers sections the size of the entries 270 * in the section is 4 bytes and for symbol stubs sections the byte size of the 271 * stubs is stored in the reserved2 field of the section structure. */ 272 273 #define MAC_S_NON_LAZY_SYMBOL_POINTERS 0x6 // section with only non-lazy symbol pointers 274 #define MAC_S_LAZY_SYMBOL_POINTERS 0x7 // section with only lazy symbol pointers 275 #define MAC_S_SYMBOL_STUBS 0x8 // section with only symbol stubs, byte size of stub in the reserved2 field 276 #define MAC_S_MOD_INIT_FUNC_POINTERS 0x9 // section with only function pointers for initialization 277 #define MAC_S_MOD_TERM_FUNC_POINTERS 0xa // section with only function pointers for termination 278 #define MAC_S_COALESCED 0xb // section contains symbols that are to be coalesced 279 #define MAC_S_GB_ZEROFILL 0xc // zero fill on demand section that can be larger than 4 gigabytes 280 #define MAC_S_INTERPOSING 0xd // section with only pairs of function pointers for interposing 281 #define MAC_S_16BYTE_LITERALS 0xe // section with only 16 byte literals 282 283 284 // Constants for the section attributes part of the flags field of a section structure. 285 286 #define MAC_SECTION_ATTRIBUTES_USR 0xff000000 /* User setable attributes */ 287 #define MAC_S_ATTR_PURE_INSTRUCTIONS 0x80000000 /* section contains only true machine instructions */ 288 #define MAC_S_ATTR_NO_TOC 0x40000000 /* section contains coalesced symbols that are not to be in a ranlib table of contents */ 289 #define MAC_S_ATTR_STRIP_STATIC_SYMS 0x20000000 /* ok to strip static symbols in this section in files with the MH_DYLDLINK flag */ 290 #define MAC_S_ATTR_NO_DEAD_STRIP 0x10000000 /* no dead stripping */ 291 #define MAC_S_ATTR_LIVE_SUPPORT 0x08000000 /* blocks are live if they reference live blocks */ 292 #define MAC_S_ATTR_SELF_MODIFYING_CODE 0x04000000 /* Used with i386 code stubs written on by dyld */ 293 #define MAC_S_ATTR_DEBUG 0x02000000 /* a debug section */ 294 #define MAC_SECTION_ATTRIBUTES_SYS 0x00ffff00 /* system setable attributes */ 295 #define MAC_S_ATTR_SOME_INSTRUCTIONS 0x00000400 /* section contains some machine instructions */ 296 #define MAC_S_ATTR_EXT_RELOC 0x00000200 /* section has external relocation entries */ 297 #define MAC_S_ATTR_LOC_RELOC 0x00000100 /* section has local relocation entries */ 298 299 300 /* The names of segments and sections in them are mostly meaningless to the 301 * link-editor. But there are few things to support traditional UNIX 302 * executables that require the link-editor and assembler to use some names 303 * agreed upon by convention. 304 * 305 * The initial protection of the "__TEXT" segment has write protection turned 306 * off (not writeable). 307 * 308 * The link-editor will allocate common symbols at the end of the "__common" 309 * section in the "__DATA" segment. It will create the section and segment 310 * if needed. */ 311 312 /* The currently known segment names and the section names in those segments */ 313 314 #define MAC_SEG_PAGEZERO "__PAGEZERO" // the pagezero segment which has no protections and catches NULL references for MH_EXECUTE files 315 #define MAC_SEG_TEXT "__TEXT" // the tradition UNIX text segment 316 #define MAC_SECT_TEXT "__text" // the real text part of the text section no headers, and no padding 317 #define MAC_SECT_FVMLIB_INIT0 "__fvmlib_init0" // the fvmlib initialization section 318 #define MAC_SECT_FVMLIB_INIT1 "__fvmlib_init1" // the section following the fvmlib initialization section 319 #define MAC_SEG_DATA "__DATA" // the tradition UNIX data segment 320 #define MAC_SECT_DATA "__data" // the real initialized data section no padding, no bss overlap 321 #define MAC_SECT_BSS "__bss" // the real uninitialized data section no padding 322 #define MAC_SECT_COMMON "__common" // the section common symbols are allocated in by the link editor 323 #define MAC_SEG_OBJC "__OBJC" // objective-C runtime segment 324 #define MAC_SECT_OBJC_SYMBOLS "__symbol_table" // symbol table 325 #define MAC_SECT_OBJC_MODULES "__module_info" // module information 326 #define MAC_SECT_OBJC_STRINGS "__selector_strs" // string table 327 #define MAC_SECT_OBJC_REFS "__selector_refs" // string table 328 #define MAC_SEG_ICON "__ICON" // the NeXT icon segment 329 #define MAC_SECT_ICON_HEADER "__header" // the icon headers 330 #define MAC_SECT_ICON_TIFF "__tiff" // the icons in tiff format 331 #define MAC_SEG_LINKEDIT "__LINKEDIT" // the segment containing all structs created and maintained by the link editor. Created with -seglinkedit option to ld(1) for MH_EXECUTE and FVMLIB file types only 332 #define MAC_SEG_UNIXSTACK "__UNIXSTACK" // the unix stack segment 333 #define MAC_SEG_IMPORT "__IMPORT" // the segment for the self (dyld) modifing code stubs that has read, write and execute permissions 334 335 336 /* The symtab_command contains the offsets and sizes of the link-edit 4.3BSD 337 * "stab" style symbol table information as described in the header files 338 * <nlist.h> and <stab.h>. */ 339 340 struct MAC_symtab_command { 341 uint32_t cmd; /* LC_SYMTAB */ 342 uint32_t cmdsize; /* sizeof(MAC_symtab_command) */ 343 uint32_t symoff; /* symbol table offset */ 344 uint32_t nsyms; /* number of symbol table entries */ 345 uint32_t stroff; /* string table offset */ 346 uint32_t strsize; /* string table size in bytes */ 347 }; 348 349 /* This is the second set of the symbolic information which is used to support 350 * the data structures for the dynamicly link editor. 351 * 352 * The original set of symbolic information in the symtab_command which contains 353 * the symbol and string tables must also be present when this load command is 354 * present. When this load command is present the symbol table is organized 355 * into three groups of symbols: 356 * local symbols (static and debugging symbols) - grouped by module 357 * defined external symbols - grouped by module (sorted by name if not lib) 358 * undefined external symbols (sorted by name) 359 * In this load command there are offsets and counts to each of the three groups 360 * of symbols. 361 * 362 * This load command contains a the offsets and sizes of the following new 363 * symbolic information tables: 364 * table of contents 365 * module table 366 * reference symbol table 367 * indirect symbol table 368 * The first three tables above (the table of contents, module table and 369 * reference symbol table) are only present if the file is a dynamicly linked 370 * shared library. For executable and object modules, which are files 371 * containing only one module, the information that would be in these three 372 * tables is determined as follows: 373 * table of contents - the defined external symbols are sorted by name 374 * module table - the file contains only one module so everything in the 375 * file is part of the module. 376 * reference symbol table - is the defined and undefined external symbols 377 * 378 * For dynamicly linked shared library files this load command also contains 379 * offsets and sizes to the pool of relocation entries for all sections 380 * separated into two groups: 381 * external relocation entries 382 * local relocation entries 383 * For executable and object modules the relocation entries continue to hang 384 * off the section structures. */ 385 386 struct MAC_dysymtab_command { 387 uint32_t cmd; /* LC_DYSYMTAB */ 388 uint32_t cmdsize; /* sizeof(struct dysymtab_command) */ 389 390 /* The symbols indicated by symoff and nsyms of the LC_SYMTAB load command 391 * are grouped into the following three groups: 392 * local symbols (further grouped by the module they are from) 393 * defined external symbols (further grouped by the module they are from) 394 * undefined symbols 395 * 396 * The local symbols are used only for debugging. The dynamic binding 397 * process may have to use them to indicate to the debugger the local 398 * symbols for a module that is being bound. 399 * 400 * The last two groups are used by the dynamic binding process to do the 401 * binding (indirectly through the module table and the reference symbol 402 * table when this is a dynamicly linked shared library file). */ 403 404 uint32_t ilocalsym; // index to local symbols 405 uint32_t nlocalsym; // number of local symbols 406 407 uint32_t iextdefsym; // index to externally defined symbols 408 uint32_t nextdefsym; // number of externally defined symbols 409 410 uint32_t iundefsym; // index to undefined symbols 411 uint32_t nundefsym; // number of undefined symbols 412 413 /* For the dynamic binding process to find which module a symbol 414 * is defined in the table of contents is used (analogous to the ranlib 415 * structure in an archive) which maps defined external symbols to modules 416 * they are defined in. This exists only in a dynamicly linked shared 417 * library file. For executable and object modules the defined external 418 * symbols are sorted by name and is use as the table of contents. */ 419 420 uint32_t tocoff; /* file offset to table of contents */ 421 uint32_t ntoc; /* number of entries in table of contents */ 422 423 /* To support dynamic binding of "modules" (whole object files) the symbol 424 * table must reflect the modules that the file was created from. This is 425 * done by having a module table that has indexes and counts into the merged 426 * tables for each module. The module structure that these two entries 427 * refer to is described below. This exists only in a dynamicly linked 428 * shared library file. For executable and object modules the file only 429 * contains one module so everything in the file belongs to the module. */ 430 431 uint32_t modtaboff; /* file offset to module table */ 432 uint32_t nmodtab; /* number of module table entries */ 433 434 /* To support dynamic module binding the module structure for each module 435 * indicates the external references (defined and undefined) each module 436 * makes. For each module there is an offset and a count into the 437 * reference symbol table for the symbols that the module references. 438 * This exists only in a dynamicly linked shared library file. For 439 * executable and object modules the defined external symbols and the 440 * undefined external symbols indicates the external references. */ 441 442 uint32_t extrefsymoff; /* offset to referenced symbol table */ 443 uint32_t nextrefsyms; /* number of referenced symbol table entries */ 444 445 /* The sections that contain "symbol pointers" and "routine stubs" have 446 * indexes and (implied counts based on the size of the section and fixed 447 * size of the entry) into the "indirect symbol" table for each pointer 448 * and stub. For every section of these two types the index into the 449 * indirect symbol table is stored in the section header in the field 450 * reserved1. An indirect symbol table entry is simply a 32bit index into 451 * the symbol table to the symbol that the pointer or stub is referring to. 452 * The indirect symbol table is ordered to match the entries in the section. */ 453 454 uint32_t indirectsymoff; // file offset to the indirect symbol table 455 uint32_t nindirectsyms; // number of indirect symbol table entries 456 457 /* To support relocating an individual module in a library file quickly the 458 * external relocation entries for each module in the library need to be 459 * accessed efficiently. Since the relocation entries can't be accessed 460 * through the section headers for a library file they are separated into 461 * groups of local and external entries further grouped by module. In this 462 * case the presents of this load command who's extreloff, nextrel, 463 * locreloff and nlocrel fields are non-zero indicates that the relocation 464 * entries of non-merged sections are not referenced through the section 465 * structures (and the reloff and nreloc fields in the section headers are 466 * set to zero). 467 * 468 * Since the relocation entries are not accessed through the section headers 469 * this requires the r_address field to be something other than a section 470 * offset to identify the item to be relocated. In this case r_address is 471 * set to the offset from the vmaddr of the first LC_SEGMENT command. 472 * 473 * The relocation entries are grouped by module and the module table 474 * entries have indexes and counts into them for the group of external 475 * relocation entries for that the module. 476 * 477 * For sections that are merged across modules there must not be any 478 * remaining external relocation entries for them (for merged sections 479 * remaining relocation entries must be local). */ 480 481 uint32_t extreloff; /* offset to external relocation entries */ 482 uint32_t nextrel; /* number of external relocation entries */ 483 484 /* All the local relocation entries are grouped together (they are not 485 * grouped by their module since they are only used if the object is moved 486 * from it staticly link edited address). */ 487 488 uint32_t locreloff; /* offset to local relocation entries */ 489 uint32_t nlocrel; /* number of local relocation entries */ 490 491 }; 492 493 /* An indirect symbol table entry is simply a 32bit index into the symbol table 494 * to the symbol that the pointer or stub is refering to. Unless it is for a 495 * non-lazy symbol pointer section for a defined symbol which strip(1) as 496 * removed. In which case it has the value INDIRECT_SYMBOL_LOCAL. If the 497 * symbol was also absolute INDIRECT_SYMBOL_ABS is or'ed with that. */ 498 499 #define MAC_INDIRECT_SYMBOL_LOCAL 0x80000000 500 #define MAC_INDIRECT_SYMBOL_ABS 0x40000000 501 502 // Relocation entries 503 /* Format of a relocation entry of a Mach-O file. Modified from the 4.3BSD 504 * format. The modifications from the original format were changing the value 505 * of the r_symbolnum field for "local" (r_extern == 0) relocation entries. 506 * This modification is required to support symbols in an arbitrary number of 507 * sections not just the three sections (text, data and bss) in a 4.3BSD file. 508 * Also the last 4 bits have had the r_type tag added to them. */ 509 510 #define R_SCATTERED 0x80000000 // mask to be applied to the r_address field of a relocation_info structure to tell that 511 // is is really a scattered_relocation_info stucture 512 513 struct MAC_relocation_info { 514 uint32_t r_address; // offset in the section to what is being relocated (source) 515 uint32_t r_symbolnum:24, // symbol table index (0-based) if r_extern == 1 or section number (1-based) if r_extern == 0 516 r_pcrel:1, // pc relative. The target address (inline) is already pc relative 517 r_length:2, // 0=byte, 1=word, 2=dword 518 r_extern:1, // r_extern = 1 for symbols in symbol table 519 r_type:4; // if not 0, machine specific relocation type 520 }; // The inline value of the source is the target address (pc-relative 521 // or absolute) if r_extern = 0, or an addend if r_extern = 1. 522 523 struct MAC_scattered_relocation_info { 524 uint32_t r_address:24, // offset in the section to what is being relocated (source) 525 r_type:4, // if not 0, machine specific relocation type 526 r_length:2, // 0=byte, 1=word, 2=dword, 3=qword 527 r_pcrel:1, // pc relative. The target address is already pc relative 528 r_scattered:1; // 1=scattered, 0=non-scattered (see above) 529 int32_t r_value; // target address (without any offset added. The offset is stored inline in the source) 530 }; 531 532 // 32-bit relocation types: 533 /* Relocation types used in a generic implementation. Relocation entries for 534 * normal things use the generic relocation as discribed above and their r_type 535 * is GENERIC_RELOC_VANILLA (a value of zero). 536 * 537 * Another type of generic relocation, GENERIC_RELOC_SECTDIFF, is to support 538 * the difference of two symbols defined in different sections. That is the 539 * expression "symbol1 - symbol2 + constant" is a relocatable expression when 540 * both symbols are defined in some section. For this type of relocation 541 * both relocations entries are scattered relocation entries. The value of 542 * symbol1 is stored in the first relocation entry's r_value field and the 543 * value of symbol2 is stored in the pair's r_value field. 544 * 545 * A special case for a prebound lazy pointer is needed to be able to set the 546 * value of the lazy pointer back to its non-prebound state. This is done 547 * using the GENERIC_RELOC_PB_LA_PTR r_type. This is a scattered relocation 548 * entry where the r_value field is the value of the lazy pointer not prebound. */ 549 550 /* My interpretation (A Fog): 551 32-bit: Objects are not addressed by their offset into the section but by 552 their "absolute" address. This "absolute" address has no reality. 553 It is the address that the object would have if the section was placed 554 at the address specified in the addr field of the section header. 555 Scattered: 556 The first record, of type MAC32_RELOC_SECTDIFF or MAC32_RELOC_LOCAL_SECTDIFF 557 contains the "absolute" address of a first reference point, let's call it ref1, 558 in the r_value field. The second record, of type MAC32_RELOC_PAIR contains the 559 "absolute" address of a second reference point, ref2, in the r_value field. 560 The inline value is the "absolute" address of the relocation target minus ref2. 561 ref1 is often = target, but may be any label preceding the target. The linker 562 has to add (ref1 - ref2) in image minus (ref1 - ref2) in object file to the 563 inline value. The relocation source (the position of the inline field) is 564 given in r_address in the first record, relative the the section. 565 Non-scattered, absolute, r_extern = 1: 566 r_symbolnum = symbol index (0-based) 567 Non-scattered, absolute, r_extern = 0: 568 r_symbolnum = section index, inline = absolute address of target? 569 Non-scattered, r_pcrel = 1, r_extern = 1: 570 r_symbolnum = symbol index (0-based) 571 Inline = source absolute address - 4 572 Non-scattered, r_pcrel = 1, r_extern = 0: 573 r_symbolnum = section index, 574 inline = absolute address of target - absolute address of source - 4 575 */ 576 577 #define MAC32_RELOC_VANILLA 0 // A generic relocation entry for both addresses contained in data 578 // and addresses contained in CPU instructions. 579 #define MAC32_RELOC_PAIR 1 // The second relocation entry of a pair. Only follows a GENERIC_RELOC_SECTDIFF 580 #define MAC32_RELOC_SECTDIFF 2 // A relocation entry for an item that contains the difference of 581 // two section addresses. This is generally used for position-independent code generation. 582 #define MAC32_RELOC_PB_LA_PTR 3 // �Arelocation entry for a prebound lazy pointer. This is always 583 // a scattered relocation entry. The r_value field contains the non-prebound value of the lazy pointer. 584 #define MAC32_RELOC_LOCAL_SECTDIFF 4 // SECTDIFF�Similar to GENERIC_RELOC_SECTDIFF except that this entry refers specifically to the address in this item. 585 // If the address is that of a globally visible coalesced symbol, this relocation entry does not change if the symbol is overridden. 586 // This is used to associate stack unwinding information with the object code this relocation entry describes. 587 588 // 64-bit relocation types: 589 // Scattered relocations are not used in 64-bit Mach-O. 590 // reloc.h says that references to local symbols are made by the nearest 591 // preceding public symbol + displacement, but my experiments show that 592 // local symbol records are used, which of course is easier. 593 // r_extern = 1 is used even for non-external symbols! 594 // The target address is not stored inline. The -4 offset for self-relative 595 // addresses is implicit, unlike in 32-bit Mach-O. If the difference 596 // between source address and instruction pointer is e.g. -5, then the 597 // -4 is implicit, and the -1 is explicit! 598 599 #define MAC64_RELOC_UNSIGNED 0 // absolute address, 32 or 64 bits 600 #define MAC64_RELOC_SIGNED 1 // signed 32-bit displacement with implicit -4 addend 601 #define MAC64_RELOC_BRANCH 2 // same, used for CALL and JMP instructions 602 #define MAC64_RELOC_GOT_LOAD 3 // self-relative load of a GOT entry 603 #define MAC64_RELOC_GOT 4 // other GOT references 604 #define MAC64_RELOC_SUBTRACTOR 5 // must be followed by a X86_64_RELOC_UNSIGNED 605 #define MAC64_RELOC_SIGNED_1 6 // signed 32-bit displacement with implicit -4 addend and explicit -1 addend 606 #define MAC64_RELOC_SIGNED_2 7 // signed 32-bit displacement with implicit -4 addend and explicit -2 addend 607 #define MAC64_RELOC_SIGNED_4 8 // signed 32-bit displacement with implicit -4 addend and explicit -4 addend 608 609 610 // Symbol table entries 611 /* Format of a symbol table entry of a Mach-O file. Modified from the BSD 612 * format. The modifications from the original format were changing n_other 613 * (an unused field) to n_sect and the addition of the N_SECT type. These 614 * modifications are required to support symbols in an arbitrary number of 615 * sections not just the three sections (text, data and bss) in a BSD file. */ 616 617 struct MAC_nlist_32 { 618 uint32_t n_strx; // index into the string table 619 uint8_t n_type; // type flag, see below 620 uint8_t n_sect; // section number or NO_SECT 621 int16_t n_desc; // see <mach-o/stab.h> 622 uint32_t n_value; // value of this symbol (or stab offset) 623 }; 624 625 struct MAC_nlist_64 { 626 uint32_t n_strx; // index into the string table 627 uint8_t n_type; // type flag, see below 628 uint8_t n_sect; // section number or NO_SECT 629 int16_t n_desc; // see <mach-o/stab.h> 630 uint64_t n_value; // value of this symbol (or stab offset) 631 }; 632 633 /* Symbols with a index into the string table of zero are 634 * defined to have a null, "", name. */ 635 636 /* The n_type field really contains three fields: 637 * unsigned char N_STAB:3, 638 * N_PEXT:1, 639 * N_TYPE:3, 640 * N_EXT:1; 641 * which are used via the following masks. */ 642 643 #define MAC_N_STAB 0xe0 /* if any of these bits set, a symbolic debugging entry */ 644 #define MAC_N_PEXT 0x10 /* private external symbol bit */ 645 #define MAC_N_TYPE 0x0e /* mask for the type bits */ 646 #define MAC_N_EXT 0x01 /* external symbol bit, set for external symbols */ 647 648 /* Only symbolic debugging entries have some of the N_STAB bits set and if any 649 * of these bits are set then it is a symbolic debugging entry (a stab). In 650 * which case then the values of the n_type field (the entire field) are given 651 * in <mach-o/stab.h> */ 652 653 // Values for N_TYPE bits of the n_type field. 654 #define MAC_N_UNDF 0x0 // undefined, n_sect == NO_SECT 655 #define MAC_N_ABS 0x2 // absolute, n_sect == NO_SECT 656 #define MAC_N_SECT 0xe // defined in section number n_sect 657 #define MAC_N_PBUD 0xc // prebound undefined (defined in a dylib) 658 #define MAC_N_INDR 0xa // indirect 659 660 /* If the type is MAC_N_INDR then the symbol is defined to be the same as another 661 * symbol. In this case the n_value field is an index into the string table 662 * of the other symbol's name. When the other symbol is defined then they both 663 * take on the defined type and value. */ 664 665 /* If the type is MAC_N_SECT then the n_sect field contains an ordinal of the 666 * section the symbol is defined in. The sections are numbered from 1 and 667 * refer to sections in order they appear in the load commands for the file 668 * they are in. This means the same ordinal may very well refer to different 669 * sections in different files. 670 * 671 * The n_value field for all symbol table entries (including N_STAB's) gets 672 * updated by the link editor based on the value of it's n_sect field and where 673 * the section n_sect references gets relocated. If the value of the n_sect 674 * field is NO_SECT then it's n_value field is not changed by the link editor. */ 675 676 #define MAC_NO_SECT 0 // symbol is not in any section 677 #define MAC_MAX_SECT 255 // 1 thru 255 inclusive 678 679 /* Common symbols are represented by undefined (N_UNDF) external (N_EXT) types 680 * who's values (n_value) are non-zero. In which case the value of the n_value 681 * field is the size (in bytes) of the common symbol. The n_sect field is set 682 * to NO_SECT. */ 683 684 /* To support the lazy binding of undefined symbols in the dynamic link-editor, 685 * the undefined symbols in the symbol table (the nlist structures) are marked 686 * with the indication if the undefined reference is a lazy reference or 687 * non-lazy reference. If both a non-lazy reference and a lazy reference is 688 * made to the same symbol the non-lazy reference takes precedence. A reference 689 * is lazy only when all references to that symbol are made through a symbol 690 * pointer in a lazy symbol pointer section. 691 * 692 * The implementation of marking nlist structures in the symbol table for 693 * undefined symbols will be to use some of the bits of the n_desc field as a 694 * reference type. The mask REFERENCE_TYPE will be applied to the n_desc field 695 * of an nlist structure for an undefined symbol to determine the type of 696 * undefined reference (lazy or non-lazy). 697 * 698 * The constants for the REFERENCE FLAGS are propagated to the reference table 699 * in a shared library file. In that case the constant for a defined symbol, 700 * REFERENCE_FLAG_DEFINED, is also used. */ 701 702 /* Reference type bits of the n_desc field of undefined symbols */ 703 #define MAC_REF_TYPE 0xf 704 /* types of references */ 705 #define MAC_REF_FLAG_UNDEFINED_NON_LAZY 0 706 #define MAC_REF_FLAG_UNDEFINED_LAZY 1 707 #define MAC_REF_FLAG_DEFINED 2 708 #define MAC_REF_FLAG_PRIVATE_DEFINED 3 709 #define MAC_REF_FLAG_PRIVATE_UNDEFINED_NON_LAZY 4 710 #define MAC_REF_FLAG_PRIVATE_UNDEFINED_LAZY 5 711 712 /* To simplify stripping of objects that use are used with the dynamic link 713 * editor, the static link editor marks the symbols defined an object that are 714 * referenced by a dynamicly bound object (dynamic shared libraries, bundles). 715 * With this marking strip knows not to strip these symbols. */ 716 717 /* The non-reference type bits of the n_desc field for global symbols are 718 * reserved for the dynamic link editor. All of these bits must start out 719 * zero in the object file. */ 720 721 722 // Additional n_desc flags 723 #define MAC_REFERENCED_DYNAMICALLY 0x10 // Must be set for any defined symbol that is referenced by dynamic-loader APIs (such as dlsym and NSLookupSymbolInImage) and not ordinary 724 // undefined symbol references. The strip tool uses this bit to avoid removing symbols that must exist: If the symbol has this bit set, strip does not strip it. 725 726 #define MAC_N_DESC_DISCARDED 0x20 // Sometimes used by the dynamic linker at runtime in a fully linked image. Do not set this bit in a fully linked image. 727 //#define MAC_N_DESC_DISCARDED 0x8000 728 729 #define MAC_N_NO_DEAD_STRIP 0x20 // When set in a relocatable object file (file type MH_OBJECT) on a defined symbol, 730 // indicates to the static linker to never dead-strip the symbol. (Note that the same bit (0x20) is used for two nonoverlapping purposes.) 731 732 #define MAC_N_WEAK_REF 0x40 // Indicates that this undefined symbol is aweak reference. If the dynamic linker cannot find a definition 733 // for this symbol, it sets the address of this symbol to 0. The static linker sets this symbol given the appropriate weak-linking flags. 734 735 #define MAC_N_WEAK_DEF 0x80 // Indicates that this symbol is a weak definition. If the static linker or the dynamic linker finds another 736 // (non-weak) definition for this symbol, theweak definition is ignored. Only symbols in a coalesced section (page 21) can be marked as a weak definition. 737 738 // Data structure used when sorting symbol table for Mach-O file in MacSymbolTableBuilder 739 template <class TMAC_nlist> 740 struct MacSymbolRecord : public TMAC_nlist { 741 uint32_t Name; // Index into MacSymbolTableBuilder::StringBuffer 742 int OldIndex; // Old symbol index 743 }; 744 745 // Class for building and storing symbol table, sorted or unsorted 746 template <class TMAC_nlist, class MInt> 747 class MacSymbolTableBuilder : public CMemoryBuffer { 748 int sorted; // Remember if list is sorted 749 CMemoryBuffer StringBuffer; // Temporary storage of symbol names 750 public: 751 MacSymbolTableBuilder(); // Constructor 752 void AddSymbol(int OldIndex, const char * name, int type, int Desc, int section, MInt value); // Add symbol to list 753 void SortList(); // Sort the list 754 int TranslateIndex(int OldIndex); // Translate old index to new index, after sorting 755 void StoreList(CMemoryBuffer * SymbolTable, CMemoryBuffer * StringTable); // Store sorted list in buffers 756 int Search(const char * name); // Search for name. -1 if not found 757 MacSymbolRecord<TMAC_nlist> & operator[] (uint32_t i); // Access member 758 }; 759 760 // structures for MacIntosh universal binaries 761 struct MAC_UNIV_FAT_HEADER { // File header for universal binary 762 uint32_t magic; // Magic number 0xCAFEBABE, big endian 763 uint32_t num_arch; // Number of members, big endian 764 }; 765 766 struct MAC_UNIV_FAT_ARCH { // Member pointer 767 uint32_t cputype; // cpu type 768 uint32_t cpusubtype; // cpu subtype 769 uint32_t offset; // file offset of member 770 uint32_t size; // size of member 771 uint32_t align; // alignment in file = 2^align 772 }; 773 774 // Structure used for list of sections that have relocations during disassembly 775 struct MAC_SECT_WITH_RELOC { 776 int32_t Section; // Section index 777 uint32_t SectOffset; // File offset of section binary data 778 uint32_t NumReloc; // Number of relocations records for this section 779 uint32_t ReltabOffset; // File offset of relocation table for this section 780 }; 781 782 /********************** Strings **********************/ 783 #define MAC_CONSTRUCTOR_NAME "__mod_init_func" // Name of constructors section 784 785 786 // Macros listing all word-size dependent structures, used as template parameter list 787 #define MACSTRUCTURES TMAC_header, TMAC_segment_command, TMAC_section, TMAC_nlist, MInt 788 #define MAC32STRUCTURES MAC_header_32, MAC_segment_command_32, MAC_section_32, MAC_nlist_32, int32_t 789 #define MAC64STRUCTURES MAC_header_64, MAC_segment_command_64, MAC_section_64, MAC_nlist_64, int64_t 790 791 #endif // #ifndef MACHO_H 792