1 //! The Mach-o, mostly zero-copy, binary format parser and raw struct definitions
2 use core::fmt;
3 use crate::alloc::vec::Vec;
4 
5 use log::debug;
6 
7 use scroll::{Pread, BE};
8 use scroll::ctx::SizeWith;
9 
10 use crate::error;
11 use crate::container;
12 
13 pub mod header;
14 pub mod constants;
15 pub mod fat;
16 pub mod load_command;
17 pub mod symbols;
18 pub mod exports;
19 pub mod imports;
20 pub mod bind_opcodes;
21 pub mod relocation;
22 pub mod segment;
23 
24 pub use self::constants::cputype as cputype;
25 
26 /// Returns a big endian magical number
peek(bytes: &[u8], offset: usize) -> error::Result<u32>27 pub fn peek(bytes: &[u8], offset: usize) -> error::Result<u32> {
28     Ok(bytes.pread_with::<u32>(offset, scroll::BE)?)
29 }
30 
31 /// Parses a magic number, and an accompanying mach-o binary parsing context, according to the magic number.
parse_magic_and_ctx(bytes: &[u8], offset: usize) -> error::Result<(u32, Option<container::Ctx>)>32 pub fn parse_magic_and_ctx(bytes: &[u8], offset: usize) -> error::Result<(u32, Option<container::Ctx>)> {
33     use crate::mach::header::*;
34     use crate::container::Container;
35     let magic = bytes.pread_with::<u32>(offset, BE)?;
36     let ctx = match magic {
37         MH_CIGAM_64 | MH_CIGAM | MH_MAGIC_64 | MH_MAGIC => {
38             let is_lsb = magic == MH_CIGAM || magic == MH_CIGAM_64;
39             let le = scroll::Endian::from(is_lsb);
40             let container = if magic == MH_MAGIC_64 || magic == MH_CIGAM_64 { Container::Big } else { Container::Little };
41             Some(container::Ctx::new(container, le))
42         },
43         _ => None,
44     };
45     Ok((magic, ctx))
46 }
47 
48 /// A cross-platform, zero-copy, endian-aware, 32/64 bit Mach-o binary parser
49 pub struct MachO<'a> {
50     /// The mach-o header
51     pub header: header::Header,
52     /// The load commands tell the kernel and dynamic linker how to use/interpret this binary
53     pub load_commands: Vec<load_command::LoadCommand>,
54     /// The load command "segments" - typically the pieces of the binary that are loaded into memory
55     pub segments: segment::Segments<'a>,
56     /// The "Nlist" style symbols in this binary - strippable
57     pub symbols: Option<symbols::Symbols<'a>>,
58     /// The dylibs this library depends on
59     pub libs: Vec<&'a str>,
60     /// The entry point (as a virtual memory address), 0 if none
61     pub entry: u64,
62     /// Whether `entry` refers to an older `LC_UNIXTHREAD` instead of the newer `LC_MAIN` entrypoint
63     pub old_style_entry: bool,
64     /// The name of the dylib, if any
65     pub name: Option<&'a str>,
66     /// Are we a little-endian binary?
67     pub little_endian: bool,
68     /// Are we a 64-bit binary
69     pub is_64: bool,
70     data: &'a [u8],
71     ctx: container::Ctx,
72     export_trie: Option<exports::ExportTrie<'a>>,
73     bind_interpreter: Option<imports::BindInterpreter<'a>>,
74 }
75 
76 impl<'a> fmt::Debug for MachO<'a> {
fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result77     fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
78         fmt.debug_struct("MachO")
79             .field("header",          &self.header)
80             .field("load_commands",   &self.load_commands)
81             .field("segments",        &self.segments)
82             .field("entry",           &self.entry)
83             .field("old_style_entry", &self.old_style_entry)
84             .field("libs",            &self.libs)
85             .field("name",            &self.name)
86             .field("little_endian",   &self.little_endian)
87             .field("is_64",           &self.is_64)
88             .field("symbols()",       &self.symbols().collect::<Vec<_>>())
89             .field("exports()",       &self.exports())
90             .field("imports()",       &self.imports())
91             .finish()
92     }
93 }
94 
95 impl<'a> MachO<'a> {
96     /// Is this a relocatable object file?
is_object_file(&self) -> bool97     pub fn is_object_file(&self) -> bool {
98         self.header.filetype == header::MH_OBJECT
99     }
100     /// Return an iterator over all the symbols in this binary
symbols(&self) -> symbols::SymbolIterator<'a>101     pub fn symbols(&self) -> symbols::SymbolIterator<'a> {
102         if let Some(ref symbols) = self.symbols {
103             symbols.into_iter()
104         } else {
105             symbols::SymbolIterator::default()
106         }
107     }
108     /// Return a vector of the relocations in this binary
relocations(&self) -> error::Result<Vec<(usize, segment::RelocationIterator, segment::Section)>>109     pub fn relocations(&self) -> error::Result<Vec<(usize, segment::RelocationIterator, segment::Section)>> {
110         debug!("Iterating relocations");
111         let mut relocs = Vec::new();
112         for (_i, segment) in (&self.segments).into_iter().enumerate() {
113             for (j, section) in segment.into_iter().enumerate() {
114                 let (section, _data) = section?;
115                 if section.nreloc > 0 {
116                     relocs.push((j, section.iter_relocations(self.data, self.ctx), section));
117                 }
118             }
119         }
120         Ok(relocs)
121     }
122     /// Return the exported symbols in this binary (if any)
exports(&self) -> error::Result<Vec<exports::Export>>123     pub fn exports(&self) -> error::Result<Vec<exports::Export>> {
124         if let Some(ref trie) = self.export_trie {
125             trie.exports(self.libs.as_slice())
126         } else {
127             Ok(vec![])
128         }
129     }
130     /// Return the imported symbols in this binary that dyld knows about (if any)
imports(&self) -> error::Result<Vec<imports::Import>>131     pub fn imports(&self) -> error::Result<Vec<imports::Import>> {
132         if let Some(ref interpreter) = self.bind_interpreter {
133             interpreter.imports(self.libs.as_slice(), self.segments.as_slice(), self.ctx)
134         } else {
135             Ok(vec![])
136         }
137     }
138     /// Parses the Mach-o binary from `bytes` at `offset`
parse(bytes: &'a [u8], mut offset: usize) -> error::Result<MachO<'a>>139     pub fn parse(bytes: &'a [u8], mut offset: usize) -> error::Result<MachO<'a>> {
140         let (magic, maybe_ctx) = parse_magic_and_ctx(bytes, offset)?;
141         let ctx = if let Some(ctx) = maybe_ctx { ctx } else { return Err(error::Error::BadMagic(u64::from(magic))) };
142         debug!("Ctx: {:?}", ctx);
143         let offset = &mut offset;
144         let header: header::Header = bytes.pread_with(*offset, ctx)?;
145         debug!("Mach-o header: {:?}", header);
146         let little_endian = ctx.le.is_little();
147         let is_64 = ctx.container.is_big();
148         *offset += header::Header::size_with(&ctx.container);
149         let ncmds = header.ncmds;
150         let mut cmds: Vec<load_command::LoadCommand> = Vec::with_capacity(ncmds);
151         let mut symbols = None;
152         let mut libs = vec!["self"];
153         let mut export_trie = None;
154         let mut bind_interpreter = None;
155         let mut unixthread_entry_address = None;
156         let mut main_entry_offset = None;
157         let mut name = None;
158         let mut segments = segment::Segments::new(ctx);
159         for i in 0..ncmds {
160             let cmd = load_command::LoadCommand::parse(bytes, offset, ctx.le)?;
161             debug!("{} - {:?}", i, cmd);
162             match cmd.command {
163                 load_command::CommandVariant::Segment32(command) => {
164                     // FIXME: we may want to be less strict about failure here, and just return an empty segment to allow parsing to continue?
165                     segments.push(segment::Segment::from_32(bytes, &command, cmd.offset, ctx)?)
166                 },
167                 load_command::CommandVariant::Segment64(command) => {
168                     segments.push(segment::Segment::from_64(bytes, &command, cmd.offset, ctx)?)
169                 },
170                 load_command::CommandVariant::Symtab(command) => {
171                     symbols = Some(symbols::Symbols::parse(bytes, &command, ctx)?);
172                 },
173                   load_command::CommandVariant::LoadDylib      (command)
174                 | load_command::CommandVariant::LoadUpwardDylib(command)
175                 | load_command::CommandVariant::ReexportDylib  (command)
176                 | load_command::CommandVariant::LoadWeakDylib  (command)
177                 | load_command::CommandVariant::LazyLoadDylib  (command) => {
178                     let lib = bytes.pread::<&str>(cmd.offset + command.dylib.name as usize)?;
179                     libs.push(lib);
180                 },
181                   load_command::CommandVariant::DyldInfo    (command)
182                 | load_command::CommandVariant::DyldInfoOnly(command) => {
183                     export_trie = Some(exports::ExportTrie::new(bytes, &command));
184                     bind_interpreter = Some(imports::BindInterpreter::new(bytes, &command));
185                 },
186                 load_command::CommandVariant::Unixthread(command) => {
187                     // dyld cares only about the first LC_UNIXTHREAD
188                     if unixthread_entry_address.is_none() {
189                         unixthread_entry_address = Some(command.instruction_pointer(header.cputype)?);
190                     }
191                 },
192                 load_command::CommandVariant::Main(command) => {
193                     // dyld cares only about the first LC_MAIN
194                     if main_entry_offset.is_none() {
195                         main_entry_offset = Some(command.entryoff);
196                     }
197                 },
198                 load_command::CommandVariant::IdDylib(command) => {
199                     let id = bytes.pread::<&str>(cmd.offset + command.dylib.name as usize)?;
200                     libs[0] = id;
201                     name = Some(id);
202                 },
203                 _ => ()
204             }
205             cmds.push(cmd)
206         }
207 
208         // dyld prefers LC_MAIN over LC_UNIXTHREAD
209         // choose the same way here
210         let (entry, old_style_entry) = if let Some(offset) = main_entry_offset {
211             // map the entrypoint offset to a virtual memory address
212             let base_address = segments.iter()
213                 .filter(|s| &s.segname[0..7] == b"__TEXT\0")
214                 .map(|s| s.vmaddr - s.fileoff)
215                 .next()
216                 .ok_or_else(||
217                     error::Error::Malformed(format!("image specifies LC_MAIN offset {} but has no __TEXT segment", offset))
218                 )?;
219 
220             (base_address + offset, false)
221         } else if let Some(address) = unixthread_entry_address {
222             (address, true)
223         } else {
224             (0, false)
225         };
226 
227         Ok(MachO {
228             header,
229             load_commands: cmds,
230             segments,
231             symbols,
232             libs,
233             export_trie,
234             bind_interpreter,
235             entry,
236             old_style_entry,
237             name,
238             ctx,
239             is_64,
240             little_endian,
241             data: bytes,
242         })
243     }
244 }
245 
246 /// A Mach-o multi architecture (Fat) binary container
247 pub struct MultiArch<'a> {
248     data: &'a [u8],
249     start: usize,
250     pub narches: usize,
251 }
252 
253 /// Iterator over the fat architecture headers in a `MultiArch` container
254 pub struct FatArchIterator<'a> {
255     index: usize,
256     data: &'a[u8],
257     narches: usize,
258     start: usize,
259 }
260 
261 impl<'a> Iterator for FatArchIterator<'a> {
262     type Item = error::Result<fat::FatArch>;
next(&mut self) -> Option<Self::Item>263     fn next(&mut self) -> Option<Self::Item> {
264         if self.index >= self.narches {
265             None
266         } else {
267             let offset = (self.index * fat::SIZEOF_FAT_ARCH) + self.start;
268             let arch = self.data.pread_with::<fat::FatArch>(offset, scroll::BE).map_err(core::convert::Into::into);
269             self.index += 1;
270             Some(arch)
271         }
272     }
273 }
274 
275 /// Iterator over every `MachO` binary contained in this `MultiArch` container
276 pub struct MachOIterator<'a> {
277     index: usize,
278     data: &'a[u8],
279     narches: usize,
280     start: usize,
281 }
282 
283 impl<'a> Iterator for MachOIterator<'a> {
284     type Item = error::Result<MachO<'a>>;
next(&mut self) -> Option<Self::Item>285     fn next(&mut self) -> Option<Self::Item> {
286         if self.index >= self.narches {
287             None
288         } else {
289             let index = self.index;
290             let offset = (index * fat::SIZEOF_FAT_ARCH) + self.start;
291             self.index += 1;
292             match self.data.pread_with::<fat::FatArch>(offset, scroll::BE) {
293                 Ok(arch) => {
294                     let bytes = arch.slice(self.data);
295                     let binary = MachO::parse(bytes, 0);
296                     Some(binary)
297                 },
298                 Err(e) => Some(Err(e.into()))
299             }
300         }
301     }
302 }
303 
304 impl<'a, 'b> IntoIterator for &'b MultiArch<'a> {
305     type Item = error::Result<MachO<'a>>;
306     type IntoIter = MachOIterator<'a>;
into_iter(self) -> Self::IntoIter307     fn into_iter(self) -> Self::IntoIter {
308         MachOIterator {
309             index: 0,
310             data: self.data,
311             narches: self.narches,
312             start: self.start,
313         }
314     }
315 }
316 
317 impl<'a> MultiArch<'a> {
318     /// Lazily construct `Self`
new(bytes: &'a [u8]) -> error::Result<Self>319     pub fn new(bytes: &'a [u8]) -> error::Result<Self> {
320         let header = fat::FatHeader::parse(bytes)?;
321         Ok(MultiArch {
322             data: bytes,
323             start: fat::SIZEOF_FAT_HEADER,
324             narches: header.nfat_arch as usize
325         })
326     }
327     /// Iterate every fat arch header
iter_arches(&self) -> FatArchIterator328     pub fn iter_arches(&self) -> FatArchIterator {
329         FatArchIterator {
330             index: 0,
331             data: self.data,
332             narches: self.narches,
333             start: self.start,
334         }
335     }
336     /// Return all the architectures in this binary
arches(&self) -> error::Result<Vec<fat::FatArch>>337     pub fn arches(&self) -> error::Result<Vec<fat::FatArch>> {
338         let mut arches = Vec::with_capacity(self.narches);
339         for arch in self.iter_arches() {
340             arches.push(arch?);
341         }
342         Ok(arches)
343     }
344     /// Try to get the Mach-o binary at `index`
get(&self, index: usize) -> error::Result<MachO<'a>>345     pub fn get(&self, index: usize) -> error::Result<MachO<'a>> {
346         if index >= self.narches {
347             return Err(error::Error::Malformed(format!("Requested the {}-th binary, but there are only {} architectures in this container", index, self.narches)))
348         }
349         let offset = (index * fat::SIZEOF_FAT_ARCH) + self.start;
350         let arch = self.data.pread_with::<fat::FatArch>(offset, scroll::BE)?;
351         let bytes = arch.slice(self.data);
352         Ok(MachO::parse(bytes, 0)?)
353     }
354 
find<F: Fn(error::Result<fat::FatArch>) -> bool>(&'a self, f: F) -> Option<error::Result<MachO<'a>>>355     pub fn find<F: Fn(error::Result<fat::FatArch>) -> bool>(&'a self, f: F) -> Option<error::Result<MachO<'a>>> {
356         for (i, arch) in self.iter_arches().enumerate() {
357             if f(arch) {
358                 return Some(self.get(i));
359             }
360         }
361         None
362     }
363     /// Try and find the `cputype` in `Self`, if there is one
find_cputype(&self, cputype: u32) -> error::Result<Option<fat::FatArch>>364     pub fn find_cputype(&self, cputype: u32) -> error::Result<Option<fat::FatArch>> {
365         for arch in self.iter_arches() {
366             let arch = arch?;
367             if arch.cputype == cputype { return Ok(Some(arch)) }
368         }
369         Ok(None)
370     }
371 }
372 
373 impl<'a> fmt::Debug for MultiArch<'a> {
fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result374     fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
375         fmt.debug_struct("MultiArch")
376             .field("arches",  &self.arches().unwrap())
377             .field("data",    &self.data.len())
378             .finish()
379     }
380 }
381 
382 #[derive(Debug)]
383 #[allow(clippy::large_enum_variant)]
384 /// Either a collection of multiple architectures, or a single mach-o binary
385 pub enum Mach<'a> {
386     /// A "fat" multi-architecture binary container
387     Fat(MultiArch<'a>),
388     /// A regular Mach-o binary
389     Binary(MachO<'a>)
390 }
391 
392 impl<'a> Mach<'a> {
393     /// Parse from `bytes` either a multi-arch binary or a regular mach-o binary
parse(bytes: &'a [u8]) -> error::Result<Self>394     pub fn parse(bytes: &'a [u8]) -> error::Result<Self> {
395         let size = bytes.len();
396         if size < 4 {
397             let error = error::Error::Malformed("size is smaller than a magical number".into());
398             return Err(error);
399         }
400         let magic = peek(&bytes, 0)?;
401         match magic {
402             fat::FAT_MAGIC => {
403                 let multi = MultiArch::new(bytes)?;
404                 Ok(Mach::Fat(multi))
405             },
406             // we might be a regular binary
407             _ => {
408                 let binary = MachO::parse(bytes, 0)?;
409                 Ok(Mach::Binary(binary))
410             }
411         }
412     }
413 }
414