1 /****************************    macho.h    ****************************************
2 * Author:        Agner Fog
3 * Date created:  2007-01-06
4 * Last modified: 2008-05-23
5 * Project:       objconv
6 * Module:        macho.h
7 * Description:
8 * Header file for definition of data structures in 32 bit Mach-O object file.
9 * Also defines class MacSymbolTableBuilder
10 * Also defines structures for MacIntosh universal binaries
11 *
12 * Copyright 2006-2008 GNU General Public License http://www.gnu.org/licenses
13 * Parts (c) 2003 Apple public source license http://www.opensource.apple.com/apsl/
14 ***********************************************************************************/
15 #ifndef MACHO_H
16 #define MACHO_H
17 
18 /********************** FILE HEADER **********************/
19 
20 struct MAC_header_32 {
21 	uint32_t	magic;		// mach magic number identifier
22 	uint32_t	cputype;	   // cpu specifier
23 	uint32_t	cpusubtype;	// machine specifier
24 	uint32_t	filetype;	// type of file
25 	uint32_t	ncmds;		// number of load commands
26 	uint32_t	sizeofcmds;	// the size of all the load commands
27 	uint32_t   flags;		// flags
28 };
29 
30 struct MAC_header_64 {
31 	uint32_t	magic;		// mach magic number identifier
32 	uint32_t	cputype;	   // cpu specifier
33 	uint32_t	cpusubtype;	// machine specifier
34 	uint32_t	filetype;	// type of file
35 	uint32_t	ncmds;		// number of load commands
36 	uint32_t	sizeofcmds;	// the size of all the load commands
37 	uint32_t   flags;		// flags
38    uint32_t   reserved;   // reserved for future use
39 };
40 
41 
42 // Constant for the magic field of the MAC_header (32-bit architectures)
43 #define	MAC_MAGIC_32   0xFEEDFACE  // 32 bit little endian
44 #define  MAC_MAGIC_64   0xFEEDFACF  // 64 bit little endian
45 #define	MAC_CIGAM_32   0xCEFAEDFE  // 32 bit big endian
46 #define  MAC_CIGAM_64   0xCFFAEDFE  // 64 bit big endian
47 #define	MAC_CIGAM_UNIV 0xBEBAFECA  // MacIntosh universal binary
48 
49 // Constants for cputype
50 #define MAC_CPU_TYPE_I386      7
51 #define MAC_CPU_TYPE_X86_64    0x1000007
52 #define MAC_CPU_TYPE_ARM       12
53 #define MAC_CPU_TYPE_SPARC     14
54 #define MAC_CPU_TYPE_POWERPC   18
55 #define MAC_CPU_TYPE_POWERPC64 0x1000012
56 
57 // Constants for cpusubtype
58 #define MAC_CPU_SUBTYPE_I386_ALL     3
59 #define MAC_CPU_SUBTYPE_X86_64_ALL   3
60 #define MAC_CPU_SUBTYPE_ARM_ALL      0
61 #define MAC_CPU_SUBTYPE_SPARC_ALL    0
62 #define MAC_CPU_SUBTYPE_POWERPC_ALL  0
63 
64 // Constants for the filetype field of the MAC_header
65 #define	MAC_OBJECT   0x1		/* relocatable object file */
66 #define	MAC_EXECUTE	 0x2		/* demand paged executable file */
67 #define	MAC_FVMLIB	 0x3		/* fixed VM shared library file */
68 #define	MAC_CORE		 0x4		/* core file */
69 #define	MAC_PRELOAD	 0x5		/* preloaded executable file */
70 #define	MAC_DYLIB	 0x6		/* dynamicly bound shared library file*/
71 #define	MAC_DYLINKER 0x7	   /* dynamic link editor */
72 #define	MAC_BUNDLE	 0x8		/* dynamicly bound bundle file */
73 
74 // Constants for the flags field of the MAC_header
75 #define MAC_NOUNDEFS                   0x1 // the object file has no undefined references, can be executed
76 #define MAC_INCRLINK                   0x2 // the object file is the output of an incremental link against a base file and can't be link edited again
77 #define MAC_DYLDLINK	                  0x4 // the object file is input for the dynamic linker and can't be staticly link edited again
78 #define MAC_BINDATLOAD                 0x8 // the object file's undefined references are bound by the dynamic linker when loaded.
79 #define MAC_PREBOUND	                 0x10 // the file has it's dynamic undefined references prebound.
80 #define MAC_SPLIT_SEGS                0x20 // the file has its read-only and read-write segments split
81 #define MAC_LAZY_INIT                 0x40 // the shared library init routine is to be run lazily via catching memory faults to its writeable segments (obsolete)
82 #define MAC_TWOLEVEL                  0x80 // the image is using two-level name space bindings
83 #define MAC_FORCE_FLAT               0x100 // the executable is forcing all images to use flat name space bindings
84 #define MAC_NOMULTIDEFS              0x200 // this umbrella guarantees no multiple defintions of symbols in its sub-images so the two-level namespace hints can always be used
85 #define MAC_NOFIXPREBINDING          0x400 // do not have dyld notify the prebinding agent about this executable
86 #define MAC_PREBINDABLE              0x800 // the binary is not prebound but can have its prebinding redone. only used when MH_PREBOUND is not set
87 #define MAC_ALLMODSBOUND            0x1000 // indicates that this binary binds to all two-level namespace modules of its dependent libraries. only used when MH_PREBINDABLE and MH_TWOLEVEL are both set
88 #define MAC_SUBSECTIONS_VIA_SYMBOLS 0x2000 // safe to divide up the sections into sub-sections via symbols for dead code stripping
89 #define MAC_CANONICAL               0x4000 // the binary has been canonicalized via the unprebind operation
90 
91 //??
92 #define MAC_VM_PROT_NONE           0x00
93 #define MAC_VM_PROT_READ           0x01
94 #define MAC_VM_PROT_WRITE          0x02
95 #define MAC_VM_PROT_EXECUTE        0x04
96 #define MAC_VM_PROT_ALL            0x07
97 
98 // Load commands
99 struct MAC_load_command {
100 	uint32_t cmd;		         // type of load command
101 	uint32_t cmdsize;	      // total size of command in bytes
102 };
103 
104 // Constants for the cmd field of all load commands, the type
105 #define MAC_LC_REQ_DYLD  0x80000000 // This bit is added if unknown command cannot be ignored
106 #define MAC_LC_SEGMENT          0x1 /* segment of this file to be mapped */
107 #define MAC_LC_SYMTAB	        0x2	/* link-edit stab symbol table info */
108 #define MAC_LC_SYMSEG	        0x3	/* link-edit gdb symbol table info (obsolete) */
109 #define MAC_LC_THREAD	        0x4	/* thread */
110 #define MAC_LC_UNIXTHREAD	     0x5	/* unix thread (includes a stack) */
111 #define MAC_LC_LOADFVMLIB	     0x6	/* load a specified fixed VM shared library */
112 #define MAC_LC_IDFVMLIB	        0x7	/* fixed VM shared library identification */
113 #define MAC_LC_IDENT	           0x8	/* object identification info (obsolete) */
114 #define MAC_LC_FVMFILE	        0x9	/* fixed VM file inclusion (internal use) */
115 #define MAC_LC_PREPAGE          0xa /* prepage command (internal use) */
116 #define MAC_LC_DYSYMTAB	        0xb	/* dynamic link-edit symbol table info */
117 #define MAC_LC_LOAD_DYLIB	     0xc	/* load a dynamicly linked shared library */
118 #define MAC_LC_ID_DYLIB	        0xd	/* dynamicly linked shared lib identification */
119 #define MAC_LC_LOAD_DYLINKER    0xe	/* load a dynamic linker */
120 #define MAC_LC_ID_DYLINKER	     0xf	/* dynamic linker identification */
121 #define MAC_LC_PREBOUND_DYLIB  0x10	/* modules prebound for a dynamicly linked shared library */
122 #define MAC_LC_ROUTINES	       0x11	/* image routines */
123 #define MAC_LC_SUB_FRAMEWORK   0x12 /* sub framework */
124 #define MAC_LC_SUB_UMBRELLA    0x13 /* sub umbrella */
125 #define MAC_LC_SUB_CLIENT      0x14 /* sub client */
126 #define MAC_LC_SUB_LIBRARY     0x15 /* sub library */
127 #define MAC_LC_TWOLEVEL_HINTS  0x16 /* two-level namespace lookup hints */
128 #define MAC_LC_PREBIND_CKSUM   0x17 /* prebind checksum */
129 #define MAC_LC_LOAD_WEAK_DYLIB (0x18 | MAC_LC_REQ_DYLD)
130 #define MAC_LC_SEGMENT_64      0x19 /* 64-bit segment of this file to be mapped */
131 #define MAC_LC_ROUTINES_64     0x1a /* 64-bit image routines */
132 #define MAC_LC_UUID            0x1b /* the uuid */
133 
134 /*
135  * The segment load command indicates that a part of this file is to be
136  * mapped into the task's address space.  The size of this segment in memory,
137  * vmsize, maybe equal to or larger than the amount to map from this file,
138  * filesize.  The file is mapped starting at fileoff to the beginning of
139  * the segment in memory, vmaddr.  The rest of the memory of the segment,
140  * if any, is allocated zero fill on demand.  The segment's maximum virtual
141  * memory protection and initial virtual memory protection are specified
142  * by the maxprot and initprot fields.  If the segment has sections then the
143  * section structures directly follow the segment command and their size is
144  * reflected in cmdsize.
145  */
146 struct MAC_segment_command_32 {	/* for 32-bit architectures */
147 	uint32_t	cmd;		      /* LC_SEGMENT */
148 	uint32_t	cmdsize;	      /* includes sizeof section structs */
149 	char		segname[16];	/* segment name */
150 	uint32_t	vmaddr;		   /* memory address of this segment */
151 	uint32_t	vmsize;		   /* memory size of this segment */
152 	uint32_t	fileoff;	      /* file offset of this segment */
153 	uint32_t	filesize;	   /* amount to map from the file */
154 	uint32_t	maxprot;    	/* maximum VM protection */
155 	uint32_t	initprot;	   /* initial VM protection */
156 	uint32_t	nsects;		   /* number of sections in segment */
157 	uint32_t	flags;		   /* flags */
158 };
159 
160 /*
161  * The 64-bit segment load command indicates that a part of this file is to be
162  * mapped into a 64-bit task's address space.  If the 64-bit segment has
163  * sections then section_64 structures directly follow the 64-bit segment
164  * command and their size is reflected in cmdsize.
165  */
166 struct MAC_segment_command_64 {	/* for 64-bit architectures */
167 	uint32_t	cmd;		    /* LC_SEGMENT_64 */
168 	uint32_t	cmdsize;	    /* includes sizeof section_64 structs */
169 	char		segname[16]; /* segment name */
170 	uint64_t	vmaddr;		 /* memory address of this segment */
171 	uint64_t	vmsize;		 /* memory size of this segment */
172 	uint64_t	fileoff;	    /* file offset of this segment */
173 	uint64_t	filesize;	 /* amount to map from the file */
174 	uint32_t	maxprot;	    /* maximum VM protection */
175 	uint32_t	initprot;	 /* initial VM protection */
176 	uint32_t	nsects;		 /* number of sections in segment */
177 	uint32_t	flags;		 /* flags */
178 };
179 
180 
181 /* Constants for the flags field of the segment_command */
182 #define	MAC_SG_HIGHVM	0x1	// the file contents for this segment is for the high part of the
183                               // VM space, the low part is zero filled (for stacks in core files)
184 #define	MAC_SG_FVMLIB	0x2	// this segment is the VM that is allocated by a fixed VM library,
185                               // for overlap checking in the link editor
186 #define	MAC_SG_NORELOC	0x4	// this segment has nothing that was relocated in it and nothing
187                               // relocated to it, that is it maybe safely replaced without relocation
188 
189 /*
190  * A segment is made up of zero or more sections.  Non-MH_OBJECT files have
191  * all of their segments with the proper sections in each, and padded to the
192  * specified segment alignment when produced by the link editor.  The first
193  * segment of a MH_EXECUTE and MH_FVMLIB format file contains the mach_header
194  * and load commands of the object file before it's first section.  The zero
195  * fill sections are always last in their segment (in all formats).  This
196  * allows the zeroed segment padding to be mapped into memory where zero fill
197  * sections might be. The gigabyte zero fill sections, those with the section
198  * type S_GB_ZEROFILL, can only be in a segment with sections of this type.
199  * These segments are then placed after all other segments.
200  *
201  * The MH_OBJECT format has all of it's sections in one segment for
202  * compactness.  There is no padding to a specified segment boundary and the
203  * mach_header and load commands are not part of the segment.
204  *
205  * Sections with the same section name, sectname, going into the same segment,
206  * segname, are combined by the link editor.  The resulting section is aligned
207  * to the maximum alignment of the combined sections and is the new section's
208  * alignment.  The combined sections are aligned to their original alignment in
209  * the combined section.  Any padded bytes to get the specified alignment are
210  * zeroed.
211  *
212  * The format of the relocation entries referenced by the reloff and nreloc
213  * fields of the section structure for mach object files is described in the
214  * header file <reloc.h>.
215  */
216 struct MAC_section_32 {	      /* for 32-bit architectures */
217 	char		sectname[16];	/* name of this section */
218 	char		segname[16];	/* segment this section goes in */
219 	uint32_t	addr;		      /* memory address of this section */
220 	uint32_t	size;		      /* size in bytes of this section */
221 	uint32_t	offset;	   	/* file offset of this section */
222 	uint32_t	align;		   /* section alignment (power of 2) */
223 	uint32_t	reloff;		   /* file offset of relocation entries */
224 	uint32_t	nreloc;		   /* number of relocation entries */
225 	uint32_t	flags;		   /* flags (section type and attributes)*/
226 	uint32_t	reserved1;	   /* reserved */
227 	uint32_t	reserved2;	   /* reserved */
228 };
229 
230 struct MAC_section_64 {    /* for 64-bit architectures */
231 	char		sectname[16];	/* name of this section */
232 	char		segname[16];	/* segment this section goes in */
233 	uint64_t	addr;		      /* memory address of this section */
234 	uint64_t	size;		      /* size in bytes of this section */
235 	uint32_t	offset;		   /* file offset of this section */
236 	uint32_t	align;		   /* section alignment (power of 2) */
237 	uint32_t	reloff;		   /* file offset of relocation entries */
238 	uint32_t	nreloc;		   /* number of relocation entries */
239 	uint32_t	flags;		   /* flags (section type and attributes)*/
240 	uint32_t	reserved1;	   /* reserved (for offset or index) */
241 	uint32_t	reserved2;	   /* reserved (for count or sizeof) */
242 	uint32_t	reserved3;	   // reserved (Note: specified in loader.h, but not in MachORuntime.pdf)
243 };
244 
245 
246 /* The flags field of a section structure is separated into two parts a section
247  * type and section attributes.  The section types are mutually exclusive (it
248  * can only have one type) but the section attributes are not (it may have more
249  * than one attribute).  */
250 
251 #define MAC_SECTION_TYPE		    0x000000ff	/* 256 section types */
252 #define MAC_SECTION_ATTRIBUTES	 0xffffff00	/*  24 section attributes */
253 
254 /* Constants for the type of a section */
255 #define	MAC_S_REGULAR		      0x0	 /* regular section */
256 #define	MAC_S_ZEROFILL		      0x1	 /* zero fill on demand section */
257 #define	MAC_S_CSTRING_LITERALS  0x2	 /* section with only literal C strings*/
258 #define	MAC_S_4BYTE_LITERALS	   0x3    /* section with only 4 byte literals */
259 #define	MAC_S_8BYTE_LITERALS	   0x4	 /* section with only 8 byte literals */
260 #define	MAC_S_LITERAL_POINTERS  0x5	 /* section with only pointers to literals */
261 
262 /* For the two types of symbol pointers sections and the symbol stubs section
263  * they have indirect symbol table entries.  For each of the entries in the
264  * section the indirect symbol table entries, in corresponding order in the
265  * indirect symbol table, start at the index stored in the reserved1 field
266  * of the section structure.  Since the indirect symbol table entries
267  * correspond to the entries in the section the number of indirect symbol table
268  * entries is inferred from the size of the section divided by the size of the
269  * entries in the section.  For symbol pointers sections the size of the entries
270  * in the section is 4 bytes and for symbol stubs sections the byte size of the
271  * stubs is stored in the reserved2 field of the section structure. */
272 
273 #define  MAC_S_NON_LAZY_SYMBOL_POINTERS	0x6  // section with only non-lazy symbol pointers
274 #define  MAC_S_LAZY_SYMBOL_POINTERS		   0x7  // section with only lazy symbol pointers
275 #define  MAC_S_SYMBOL_STUBS	            0x8  // section with only symbol stubs, byte size of stub in the reserved2 field
276 #define  MAC_S_MOD_INIT_FUNC_POINTERS	   0x9  // section with only function pointers for initialization
277 #define  MAC_S_MOD_TERM_FUNC_POINTERS	   0xa  // section with only function pointers for termination
278 #define  MAC_S_COALESCED                  0xb  // section contains symbols that are to be coalesced
279 #define  MAC_S_GB_ZEROFILL                0xc  // zero fill on demand section that can be larger than 4 gigabytes
280 #define  MAC_S_INTERPOSING                0xd  // section with only pairs of function pointers for interposing
281 #define  MAC_S_16BYTE_LITERALS            0xe  // section with only 16 byte literals
282 
283 
284 // Constants for the section attributes part of the flags field of a section structure.
285 
286 #define MAC_SECTION_ATTRIBUTES_USR	  0xff000000	/* User setable attributes */
287 #define MAC_S_ATTR_PURE_INSTRUCTIONS  0x80000000	/* section contains only true machine instructions */
288 #define MAC_S_ATTR_NO_TOC             0x40000000	/* section contains coalesced symbols that are not to be in a ranlib table of contents */
289 #define MAC_S_ATTR_STRIP_STATIC_SYMS  0x20000000	/* ok to strip static symbols in this section in files with the MH_DYLDLINK flag */
290 #define MAC_S_ATTR_NO_DEAD_STRIP      0x10000000	/* no dead stripping */
291 #define MAC_S_ATTR_LIVE_SUPPORT       0x08000000	/* blocks are live if they reference live blocks */
292 #define MAC_S_ATTR_SELF_MODIFYING_CODE 0x04000000	/* Used with i386 code stubs written on by dyld */
293 #define MAC_S_ATTR_DEBUG              0x02000000	/* a debug section */
294 #define MAC_SECTION_ATTRIBUTES_SYS	  0x00ffff00	/* system setable attributes */
295 #define MAC_S_ATTR_SOME_INSTRUCTIONS  0x00000400	/* section contains some machine instructions */
296 #define MAC_S_ATTR_EXT_RELOC	        0x00000200	/* section has external relocation entries */
297 #define MAC_S_ATTR_LOC_RELOC	        0x00000100	/* section has local relocation entries */
298 
299 
300 /* The names of segments and sections in them are mostly meaningless to the
301  * link-editor.  But there are few things to support traditional UNIX
302  * executables that require the link-editor and assembler to use some names
303  * agreed upon by convention.
304  *
305  * The initial protection of the "__TEXT" segment has write protection turned
306  * off (not writeable).
307  *
308  * The link-editor will allocate common symbols at the end of the "__common"
309  * section in the "__DATA" segment.  It will create the section and segment
310  * if needed. */
311 
312 /* The currently known segment names and the section names in those segments */
313 
314 #define	MAC_SEG_PAGEZERO	    "__PAGEZERO"      // the pagezero segment which has no protections and catches NULL references for MH_EXECUTE files
315 #define	MAC_SEG_TEXT	       "__TEXT"          // the tradition UNIX text segment
316 #define	MAC_SECT_TEXT	       "__text"          // the real text part of the text section no headers, and no padding
317 #define  MAC_SECT_FVMLIB_INIT0 "__fvmlib_init0"  // the fvmlib initialization section
318 #define  MAC_SECT_FVMLIB_INIT1 "__fvmlib_init1"  // the section following the fvmlib initialization section
319 #define	MAC_SEG_DATA	       "__DATA"	       // the tradition UNIX data segment
320 #define	MAC_SECT_DATA	       "__data"          // the real initialized data section no padding, no bss overlap
321 #define	MAC_SECT_BSS	       "__bss"		       // the real uninitialized data section no padding
322 #define  MAC_SECT_COMMON	    "__common"	       // the section common symbols are allocated in by the link editor
323 #define	MAC_SEG_OBJC	       "__OBJC"	       // objective-C runtime segment
324 #define  MAC_SECT_OBJC_SYMBOLS "__symbol_table"	 // symbol table
325 #define  MAC_SECT_OBJC_MODULES "__module_info"	 // module information
326 #define  MAC_SECT_OBJC_STRINGS "__selector_strs" // string table
327 #define  MAC_SECT_OBJC_REFS    "__selector_refs" // string table
328 #define	MAC_SEG_ICON	       "__ICON"          // the NeXT icon segment
329 #define	MAC_SECT_ICON_HEADER  "__header"        // the icon headers
330 #define	MAC_SECT_ICON_TIFF    "__tiff"          // the icons in tiff format
331 #define	MAC_SEG_LINKEDIT	    "__LINKEDIT"      // the segment containing all structs created and maintained by the link editor.  Created with -seglinkedit option to ld(1) for MH_EXECUTE and FVMLIB file types only
332 #define  MAC_SEG_UNIXSTACK	    "__UNIXSTACK"	    // the unix stack segment
333 #define  MAC_SEG_IMPORT        "__IMPORT"        // the segment for the self (dyld) modifing code stubs that has read, write and execute permissions
334 
335 
336 /* The symtab_command contains the offsets and sizes of the link-edit 4.3BSD
337  * "stab" style symbol table information as described in the header files
338  * <nlist.h> and <stab.h>. */
339 
340 struct MAC_symtab_command {
341 	uint32_t	cmd;		   /* LC_SYMTAB */
342 	uint32_t	cmdsize;	   /* sizeof(MAC_symtab_command) */
343 	uint32_t	symoff;		/* symbol table offset */
344 	uint32_t	nsyms;		/* number of symbol table entries */
345 	uint32_t	stroff;		/* string table offset */
346 	uint32_t	strsize;	   /* string table size in bytes */
347 };
348 
349 /* This is the second set of the symbolic information which is used to support
350  * the data structures for the dynamicly link editor.
351  *
352  * The original set of symbolic information in the symtab_command which contains
353  * the symbol and string tables must also be present when this load command is
354  * present.  When this load command is present the symbol table is organized
355  * into three groups of symbols:
356  *	local symbols (static and debugging symbols) - grouped by module
357  *	defined external symbols - grouped by module (sorted by name if not lib)
358  *	undefined external symbols (sorted by name)
359  * In this load command there are offsets and counts to each of the three groups
360  * of symbols.
361  *
362  * This load command contains a the offsets and sizes of the following new
363  * symbolic information tables:
364  *	table of contents
365  *	module table
366  *	reference symbol table
367  *	indirect symbol table
368  * The first three tables above (the table of contents, module table and
369  * reference symbol table) are only present if the file is a dynamicly linked
370  * shared library.  For executable and object modules, which are files
371  * containing only one module, the information that would be in these three
372  * tables is determined as follows:
373  * 	table of contents - the defined external symbols are sorted by name
374  *	module table - the file contains only one module so everything in the
375  *		       file is part of the module.
376  *	reference symbol table - is the defined and undefined external symbols
377  *
378  * For dynamicly linked shared library files this load command also contains
379  * offsets and sizes to the pool of relocation entries for all sections
380  * separated into two groups:
381  *	external relocation entries
382  *	local relocation entries
383  * For executable and object modules the relocation entries continue to hang
384  * off the section structures.  */
385 
386 struct MAC_dysymtab_command {
387     uint32_t cmd;		/* LC_DYSYMTAB */
388     uint32_t cmdsize;	/* sizeof(struct dysymtab_command) */
389 
390     /* The symbols indicated by symoff and nsyms of the LC_SYMTAB load command
391      * are grouped into the following three groups:
392      *    local symbols (further grouped by the module they are from)
393      *    defined external symbols (further grouped by the module they are from)
394      *    undefined symbols
395      *
396      * The local symbols are used only for debugging.  The dynamic binding
397      * process may have to use them to indicate to the debugger the local
398      * symbols for a module that is being bound.
399      *
400      * The last two groups are used by the dynamic binding process to do the
401      * binding (indirectly through the module table and the reference symbol
402      * table when this is a dynamicly linked shared library file).    */
403 
404     uint32_t ilocalsym;	// index to local symbols
405     uint32_t nlocalsym;	// number of local symbols
406 
407     uint32_t iextdefsym;	// index to externally defined symbols
408     uint32_t nextdefsym;	// number of externally defined symbols
409 
410     uint32_t iundefsym;	// index to undefined symbols
411     uint32_t nundefsym;	// number of undefined symbols
412 
413     /* For the dynamic binding process to find which module a symbol
414      * is defined in the table of contents is used (analogous to the ranlib
415      * structure in an archive) which maps defined external symbols to modules
416      * they are defined in.  This exists only in a dynamicly linked shared
417      * library file.  For executable and object modules the defined external
418      * symbols are sorted by name and is use as the table of contents.     */
419 
420     uint32_t tocoff;	/* file offset to table of contents */
421     uint32_t ntoc;		/* number of entries in table of contents */
422 
423     /* To support dynamic binding of "modules" (whole object files) the symbol
424      * table must reflect the modules that the file was created from.  This is
425      * done by having a module table that has indexes and counts into the merged
426      * tables for each module.  The module structure that these two entries
427      * refer to is described below.  This exists only in a dynamicly linked
428      * shared library file.  For executable and object modules the file only
429      * contains one module so everything in the file belongs to the module.     */
430 
431     uint32_t modtaboff;	/* file offset to module table */
432     uint32_t nmodtab;	   /* number of module table entries */
433 
434     /* To support dynamic module binding the module structure for each module
435      * indicates the external references (defined and undefined) each module
436      * makes.  For each module there is an offset and a count into the
437      * reference symbol table for the symbols that the module references.
438      * This exists only in a dynamicly linked shared library file.  For
439      * executable and object modules the defined external symbols and the
440      * undefined external symbols indicates the external references.     */
441 
442     uint32_t extrefsymoff;  /* offset to referenced symbol table */
443     uint32_t nextrefsyms;	  /* number of referenced symbol table entries */
444 
445     /* The sections that contain "symbol pointers" and "routine stubs" have
446      * indexes and (implied counts based on the size of the section and fixed
447      * size of the entry) into the "indirect symbol" table for each pointer
448      * and stub.  For every section of these two types the index into the
449      * indirect symbol table is stored in the section header in the field
450      * reserved1.  An indirect symbol table entry is simply a 32bit index into
451      * the symbol table to the symbol that the pointer or stub is referring to.
452      * The indirect symbol table is ordered to match the entries in the section. */
453 
454     uint32_t indirectsymoff; // file offset to the indirect symbol table
455     uint32_t nindirectsyms;  // number of indirect symbol table entries
456 
457     /* To support relocating an individual module in a library file quickly the
458      * external relocation entries for each module in the library need to be
459      * accessed efficiently.  Since the relocation entries can't be accessed
460      * through the section headers for a library file they are separated into
461      * groups of local and external entries further grouped by module.  In this
462      * case the presents of this load command who's extreloff, nextrel,
463      * locreloff and nlocrel fields are non-zero indicates that the relocation
464      * entries of non-merged sections are not referenced through the section
465      * structures (and the reloff and nreloc fields in the section headers are
466      * set to zero).
467      *
468      * Since the relocation entries are not accessed through the section headers
469      * this requires the r_address field to be something other than a section
470      * offset to identify the item to be relocated.  In this case r_address is
471      * set to the offset from the vmaddr of the first LC_SEGMENT command.
472      *
473      * The relocation entries are grouped by module and the module table
474      * entries have indexes and counts into them for the group of external
475      * relocation entries for that the module.
476      *
477      * For sections that are merged across modules there must not be any
478      * remaining external relocation entries for them (for merged sections
479      * remaining relocation entries must be local).     */
480 
481     uint32_t extreloff;	/* offset to external relocation entries */
482     uint32_t nextrel;	   /* number of external relocation entries */
483 
484     /* All the local relocation entries are grouped together (they are not
485      * grouped by their module since they are only used if the object is moved
486      * from it staticly link edited address).     */
487 
488     uint32_t locreloff;	/* offset to local relocation entries */
489     uint32_t nlocrel;	/* number of local relocation entries */
490 
491 };
492 
493 /* An indirect symbol table entry is simply a 32bit index into the symbol table
494  * to the symbol that the pointer or stub is refering to.  Unless it is for a
495  * non-lazy symbol pointer section for a defined symbol which strip(1) as
496  * removed.  In which case it has the value INDIRECT_SYMBOL_LOCAL.  If the
497  * symbol was also absolute INDIRECT_SYMBOL_ABS is or'ed with that. */
498 
499 #define MAC_INDIRECT_SYMBOL_LOCAL  0x80000000
500 #define MAC_INDIRECT_SYMBOL_ABS    0x40000000
501 
502 // Relocation entries
503 /* Format of a relocation entry of a Mach-O file.  Modified from the 4.3BSD
504  * format.  The modifications from the original format were changing the value
505  * of the r_symbolnum field for "local" (r_extern == 0) relocation entries.
506  * This modification is required to support symbols in an arbitrary number of
507  * sections not just the three sections (text, data and bss) in a 4.3BSD file.
508  * Also the last 4 bits have had the r_type tag added to them. */
509 
510 #define R_SCATTERED 0x80000000	// mask to be applied to the r_address field of a relocation_info structure to tell that
511                                  // is is really a scattered_relocation_info stucture
512 
513 struct MAC_relocation_info {
514    uint32_t  r_address;      // offset in the section to what is being relocated (source)
515    uint32_t  r_symbolnum:24, // symbol table index (0-based) if r_extern == 1 or section number (1-based) if r_extern == 0
516            r_pcrel:1,      // pc relative. The target address (inline) is already pc relative
517            r_length:2,     // 0=byte, 1=word, 2=dword
518            r_extern:1,     // r_extern = 1 for symbols in symbol table
519            r_type:4;       // if not 0, machine specific relocation type
520 };                         // The inline value of the source is the target address (pc-relative
521                            // or absolute) if r_extern = 0, or an addend if r_extern = 1.
522 
523 struct MAC_scattered_relocation_info {
524    uint32_t  r_address:24,   // offset in the section to what is being relocated (source)
525            r_type:4,       // if not 0, machine specific relocation type
526            r_length:2,     // 0=byte, 1=word, 2=dword, 3=qword
527            r_pcrel:1,      // pc relative. The target address is already pc relative
528            r_scattered:1;  // 1=scattered, 0=non-scattered (see above)
529    int32_t   r_value;        // target address (without any offset added. The offset is stored inline in the source)
530 };
531 
532 // 32-bit relocation types:
533 /* Relocation types used in a generic implementation.  Relocation entries for
534  * normal things use the generic relocation as discribed above and their r_type
535  * is GENERIC_RELOC_VANILLA (a value of zero).
536  *
537  * Another type of generic relocation, GENERIC_RELOC_SECTDIFF, is to support
538  * the difference of two symbols defined in different sections.  That is the
539  * expression "symbol1 - symbol2 + constant" is a relocatable expression when
540  * both symbols are defined in some section.  For this type of relocation
541  * both relocations entries are scattered relocation entries.  The value of
542  * symbol1 is stored in the first relocation entry's r_value field and the
543  * value of symbol2 is stored in the pair's r_value field.
544  *
545  * A special case for a prebound lazy pointer is needed to be able to set the
546  * value of the lazy pointer back to its non-prebound state.  This is done
547  * using the GENERIC_RELOC_PB_LA_PTR r_type.  This is a scattered relocation
548  * entry where the r_value field is the value of the lazy pointer not prebound. */
549 
550 /* My interpretation (A Fog):
551    32-bit: Objects are not addressed by their offset into the section but by
552    their "absolute" address. This "absolute" address has no reality.
553    It is the address that the object would have if the section was placed
554    at the address specified in the addr field of the section header.
555    Scattered:
556    The first record, of type MAC32_RELOC_SECTDIFF or MAC32_RELOC_LOCAL_SECTDIFF
557    contains the "absolute" address of a first reference point, let's call it ref1,
558    in the r_value field. The second record, of type MAC32_RELOC_PAIR contains the
559    "absolute" address of a second reference point, ref2, in the r_value field.
560    The inline value is the "absolute" address of the relocation target minus ref2.
561    ref1 is often = target, but may be any label preceding the target. The linker
562    has to add (ref1 - ref2) in image minus (ref1 - ref2) in object file to the
563    inline value. The relocation source (the position of the inline field) is
564    given in r_address in the first record, relative the the section.
565    Non-scattered, absolute, r_extern = 1:
566    r_symbolnum = symbol index (0-based)
567    Non-scattered, absolute, r_extern = 0:
568    r_symbolnum = section index, inline = absolute address of target?
569    Non-scattered, r_pcrel = 1, r_extern = 1:
570    r_symbolnum = symbol index (0-based)
571    Inline = source absolute address - 4
572    Non-scattered, r_pcrel = 1, r_extern = 0:
573    r_symbolnum = section index,
574    inline = absolute address of target - absolute address of source - 4
575 */
576 
577 #define MAC32_RELOC_VANILLA        0   // A generic relocation entry for both addresses contained in data
578                                        // and addresses contained in CPU instructions.
579 #define MAC32_RELOC_PAIR           1   // The second relocation entry of a pair. Only follows a GENERIC_RELOC_SECTDIFF
580 #define MAC32_RELOC_SECTDIFF       2   // A relocation entry for an item that contains the difference of
581                                        // two section addresses. This is generally used for position-independent code generation.
582 #define MAC32_RELOC_PB_LA_PTR      3   // �Arelocation entry for a prebound lazy pointer. This is always
583                                        // a scattered relocation entry. The r_value field contains the non-prebound value of the lazy pointer.
584 #define MAC32_RELOC_LOCAL_SECTDIFF 4   // SECTDIFF�Similar to GENERIC_RELOC_SECTDIFF except that this entry refers specifically to the address in this item.
585                                        // If the address is that of a globally visible coalesced symbol, this relocation entry does not change if the symbol is overridden.
586                                        // This is used to associate stack unwinding information with the object code this relocation entry describes.
587 
588 // 64-bit relocation types:
589 // Scattered relocations are not used in 64-bit Mach-O.
590 // reloc.h says that references to local symbols are made by the nearest
591 // preceding public symbol + displacement, but my experiments show that
592 // local symbol records are used, which of course is easier.
593 // r_extern = 1 is used even for non-external symbols!
594 // The target address is not stored inline. The -4 offset for self-relative
595 // addresses is implicit, unlike in 32-bit Mach-O. If the difference
596 // between source address and instruction pointer is e.g. -5, then the
597 // -4 is implicit, and the -1 is explicit!
598 
599 #define MAC64_RELOC_UNSIGNED       0   // absolute address, 32 or 64 bits
600 #define MAC64_RELOC_SIGNED         1   // signed 32-bit displacement with implicit -4 addend
601 #define MAC64_RELOC_BRANCH         2   // same, used for CALL and JMP instructions
602 #define MAC64_RELOC_GOT_LOAD       3   // self-relative load of a GOT entry
603 #define MAC64_RELOC_GOT            4   // other GOT references
604 #define MAC64_RELOC_SUBTRACTOR     5   // must be followed by a X86_64_RELOC_UNSIGNED
605 #define MAC64_RELOC_SIGNED_1       6   // signed 32-bit displacement with implicit -4 addend and explicit -1 addend
606 #define MAC64_RELOC_SIGNED_2       7   // signed 32-bit displacement with implicit -4 addend and explicit -2 addend
607 #define MAC64_RELOC_SIGNED_4       8   // signed 32-bit displacement with implicit -4 addend and explicit -4 addend
608 
609 
610 // Symbol table entries
611 /* Format of a symbol table entry of a Mach-O file.  Modified from the BSD
612  * format.  The modifications from the original format were changing n_other
613  * (an unused field) to n_sect and the addition of the N_SECT type.  These
614  * modifications are required to support symbols in an arbitrary number of
615  * sections not just the three sections (text, data and bss) in a BSD file. */
616 
617 struct MAC_nlist_32 {
618    uint32_t  n_strx;   // index into the string table
619    uint8_t   n_type;   // type flag, see below
620    uint8_t   n_sect;   // section number or NO_SECT
621    int16_t   n_desc;   // see <mach-o/stab.h>
622    uint32_t  n_value;  // value of this symbol (or stab offset)
623 };
624 
625 struct MAC_nlist_64 {
626    uint32_t  n_strx;   // index into the string table
627    uint8_t   n_type;   // type flag, see below
628    uint8_t   n_sect;   // section number or NO_SECT
629    int16_t   n_desc;   // see <mach-o/stab.h>
630    uint64_t  n_value;  // value of this symbol (or stab offset)
631 };
632 
633 /* Symbols with a index into the string table of zero are
634  * defined to have a null, "", name.  */
635 
636 /* The n_type field really contains three fields:
637 *      unsigned char N_STAB:3,
638 *                    N_PEXT:1,
639 *                    N_TYPE:3,
640 *                    N_EXT:1;
641 * which are used via the following masks. */
642 
643 #define MAC_N_STAB  0xe0  /* if any of these bits set, a symbolic debugging entry */
644 #define MAC_N_PEXT  0x10  /* private external symbol bit */
645 #define MAC_N_TYPE  0x0e  /* mask for the type bits */
646 #define MAC_N_EXT   0x01  /* external symbol bit, set for external symbols */
647 
648 /* Only symbolic debugging entries have some of the N_STAB bits set and if any
649  * of these bits are set then it is a symbolic debugging entry (a stab).  In
650  * which case then the values of the n_type field (the entire field) are given
651  * in <mach-o/stab.h> */
652 
653 // Values for N_TYPE bits of the n_type field.
654 #define MAC_N_UNDF  0x0   // undefined, n_sect == NO_SECT
655 #define MAC_N_ABS   0x2   // absolute, n_sect == NO_SECT
656 #define MAC_N_SECT  0xe   // defined in section number n_sect
657 #define MAC_N_PBUD  0xc   // prebound undefined (defined in a dylib)
658 #define MAC_N_INDR  0xa   // indirect
659 
660 /* If the type is MAC_N_INDR then the symbol is defined to be the same as another
661  * symbol.  In this case the n_value field is an index into the string table
662  * of the other symbol's name.  When the other symbol is defined then they both
663  * take on the defined type and value. */
664 
665 /* If the type is MAC_N_SECT then the n_sect field contains an ordinal of the
666  * section the symbol is defined in.  The sections are numbered from 1 and
667  * refer to sections in order they appear in the load commands for the file
668  * they are in.  This means the same ordinal may very well refer to different
669  * sections in different files.
670  *
671  * The n_value field for all symbol table entries (including N_STAB's) gets
672  * updated by the link editor based on the value of it's n_sect field and where
673  * the section n_sect references gets relocated.  If the value of the n_sect
674  * field is NO_SECT then it's n_value field is not changed by the link editor. */
675 
676 #define MAC_NO_SECT         0       // symbol is not in any section
677 #define MAC_MAX_SECT        255     // 1 thru 255 inclusive
678 
679 /* Common symbols are represented by undefined (N_UNDF) external (N_EXT) types
680  * who's values (n_value) are non-zero.  In which case the value of the n_value
681  * field is the size (in bytes) of the common symbol.  The n_sect field is set
682  * to NO_SECT. */
683 
684 /* To support the lazy binding of undefined symbols in the dynamic link-editor,
685  * the undefined symbols in the symbol table (the nlist structures) are marked
686  * with the indication if the undefined reference is a lazy reference or
687  * non-lazy reference.  If both a non-lazy reference and a lazy reference is
688  * made to the same symbol the non-lazy reference takes precedence.  A reference
689  * is lazy only when all references to that symbol are made through a symbol
690  * pointer in a lazy symbol pointer section.
691  *
692  * The implementation of marking nlist structures in the symbol table for
693  * undefined symbols will be to use some of the bits of the n_desc field as a
694  * reference type.  The mask REFERENCE_TYPE will be applied to the n_desc field
695  * of an nlist structure for an undefined symbol to determine the type of
696  * undefined reference (lazy or non-lazy).
697  *
698  * The constants for the REFERENCE FLAGS are propagated to the reference table
699  * in a shared library file.  In that case the constant for a defined symbol,
700  * REFERENCE_FLAG_DEFINED, is also used. */
701 
702 /* Reference type bits of the n_desc field of undefined symbols */
703 #define MAC_REF_TYPE                                  0xf
704 /* types of references */
705 #define MAC_REF_FLAG_UNDEFINED_NON_LAZY               0
706 #define MAC_REF_FLAG_UNDEFINED_LAZY                   1
707 #define MAC_REF_FLAG_DEFINED                          2
708 #define MAC_REF_FLAG_PRIVATE_DEFINED                  3
709 #define MAC_REF_FLAG_PRIVATE_UNDEFINED_NON_LAZY       4
710 #define MAC_REF_FLAG_PRIVATE_UNDEFINED_LAZY           5
711 
712 /* To simplify stripping of objects that use are used with the dynamic link
713  * editor, the static link editor marks the symbols defined an object that are
714  * referenced by a dynamicly bound object (dynamic shared libraries, bundles).
715  * With this marking strip knows not to strip these symbols. */
716 
717 /* The non-reference type bits of the n_desc field for global symbols are
718  * reserved for the dynamic link editor.  All of these bits must start out
719  * zero in the object file. */
720 
721 
722 // Additional n_desc flags
723 #define MAC_REFERENCED_DYNAMICALLY 0x10  // Must be set for any defined symbol that is referenced by dynamic-loader APIs (such as dlsym and NSLookupSymbolInImage) and not ordinary
724                                          // undefined symbol references. The strip tool uses this bit to avoid removing symbols that must exist: If the symbol has this bit set, strip does not strip it.
725 
726 #define MAC_N_DESC_DISCARDED       0x20  // Sometimes used by the dynamic linker at runtime in a fully linked image. Do not set this bit in a fully linked image.
727 //#define MAC_N_DESC_DISCARDED 0x8000
728 
729 #define MAC_N_NO_DEAD_STRIP        0x20  // When set in a relocatable object file (file type MH_OBJECT) on a defined symbol,
730                                          // indicates to the static linker to never dead-strip the symbol. (Note that the same bit (0x20) is used for two nonoverlapping purposes.)
731 
732 #define MAC_N_WEAK_REF             0x40  // Indicates that this undefined symbol is aweak reference. If the dynamic linker cannot find a definition
733                                          // for this symbol, it sets the address of this symbol to 0. The static linker sets this symbol given the appropriate weak-linking flags.
734 
735 #define MAC_N_WEAK_DEF             0x80  // Indicates that this symbol is a weak definition. If the static linker or the dynamic linker finds another
736                                          // (non-weak) definition for this symbol, theweak definition is ignored. Only symbols in a coalesced section (page 21) can be marked as a weak definition.
737 
738 // Data structure used when sorting symbol table for Mach-O file in MacSymbolTableBuilder
739 template <class TMAC_nlist>
740 struct MacSymbolRecord : public TMAC_nlist {
741    uint32_t Name;                        // Index into MacSymbolTableBuilder::StringBuffer
742    int OldIndex;                       // Old symbol index
743 };
744 
745 // Class for building and storing symbol table, sorted or unsorted
746 template <class TMAC_nlist, class MInt>
747 class MacSymbolTableBuilder : public CMemoryBuffer {
748    int sorted;                                   // Remember if list is sorted
749    CMemoryBuffer StringBuffer;                   // Temporary storage of symbol names
750 public:
751    MacSymbolTableBuilder();                      // Constructor
752    void AddSymbol(int OldIndex, const char * name, int type, int Desc, int section, MInt value); // Add symbol to list
753    void SortList();                              // Sort the list
754    int TranslateIndex(int OldIndex);             // Translate old index to new index, after sorting
755    void StoreList(CMemoryBuffer * SymbolTable, CMemoryBuffer * StringTable); // Store sorted list in buffers
756    int Search(const char * name);                // Search for name. -1 if not found
757    MacSymbolRecord<TMAC_nlist> & operator[] (uint32_t i);      // Access member
758 };
759 
760 // structures for MacIntosh universal binaries
761 struct MAC_UNIV_FAT_HEADER {           // File header for universal binary
762    uint32_t magic;                       // Magic number 0xCAFEBABE, big endian
763    uint32_t num_arch;                    // Number of members, big endian
764 };
765 
766 struct MAC_UNIV_FAT_ARCH {             // Member pointer
767    uint32_t cputype;                     // cpu type
768    uint32_t cpusubtype;                  // cpu subtype
769    uint32_t offset;                      // file offset of member
770    uint32_t size;                        // size of member
771    uint32_t align;                       // alignment in file = 2^align
772 };
773 
774 // Structure used for list of sections that have relocations during disassembly
775 struct MAC_SECT_WITH_RELOC {
776    int32_t  Section;                     // Section index
777    uint32_t SectOffset;                  // File offset of section binary data
778    uint32_t NumReloc;                    // Number of relocations records for this section
779    uint32_t ReltabOffset;                // File offset of relocation table for this section
780 };
781 
782 /********************** Strings **********************/
783 #define MAC_CONSTRUCTOR_NAME    "__mod_init_func"  // Name of constructors section
784 
785 
786 // Macros listing all word-size dependent structures, used as template parameter list
787 #define MACSTRUCTURES    TMAC_header,   TMAC_segment_command,   TMAC_section,   TMAC_nlist, MInt
788 #define MAC32STRUCTURES  MAC_header_32, MAC_segment_command_32, MAC_section_32, MAC_nlist_32, int32_t
789 #define MAC64STRUCTURES  MAC_header_64, MAC_segment_command_64, MAC_section_64, MAC_nlist_64, int64_t
790 
791 #endif // #ifndef MACHO_H
792