1 //! The Mach-o, mostly zero-copy, binary format parser and raw struct definitions
2 use alloc::vec::Vec;
3 use core::fmt;
4 
5 use log::debug;
6 
7 use scroll::ctx::SizeWith;
8 use scroll::{Pread, BE};
9 
10 use crate::container;
11 use crate::error;
12 
13 pub mod bind_opcodes;
14 pub mod constants;
15 pub mod exports;
16 pub mod fat;
17 pub mod header;
18 pub mod imports;
19 pub mod load_command;
20 pub mod relocation;
21 pub mod segment;
22 pub mod symbols;
23 
24 pub use self::constants::cputype;
25 
26 /// Returns a big endian magical number
peek(bytes: &[u8], offset: usize) -> error::Result<u32>27 pub fn peek(bytes: &[u8], offset: usize) -> error::Result<u32> {
28     Ok(bytes.pread_with::<u32>(offset, scroll::BE)?)
29 }
30 
31 /// Parses a magic number, and an accompanying mach-o binary parsing context, according to the magic number.
parse_magic_and_ctx( bytes: &[u8], offset: usize, ) -> error::Result<(u32, Option<container::Ctx>)>32 pub fn parse_magic_and_ctx(
33     bytes: &[u8],
34     offset: usize,
35 ) -> error::Result<(u32, Option<container::Ctx>)> {
36     use crate::container::Container;
37     use crate::mach::header::*;
38     let magic = bytes.pread_with::<u32>(offset, BE)?;
39     let ctx = match magic {
40         MH_CIGAM_64 | MH_CIGAM | MH_MAGIC_64 | MH_MAGIC => {
41             let is_lsb = magic == MH_CIGAM || magic == MH_CIGAM_64;
42             let le = scroll::Endian::from(is_lsb);
43             let container = if magic == MH_MAGIC_64 || magic == MH_CIGAM_64 {
44                 Container::Big
45             } else {
46                 Container::Little
47             };
48             Some(container::Ctx::new(container, le))
49         }
50         _ => None,
51     };
52     Ok((magic, ctx))
53 }
54 
55 /// A cross-platform, zero-copy, endian-aware, 32/64 bit Mach-o binary parser
56 pub struct MachO<'a> {
57     /// The mach-o header
58     pub header: header::Header,
59     /// The load commands tell the kernel and dynamic linker how to use/interpret this binary
60     pub load_commands: Vec<load_command::LoadCommand>,
61     /// The load command "segments" - typically the pieces of the binary that are loaded into memory
62     pub segments: segment::Segments<'a>,
63     /// The "Nlist" style symbols in this binary - strippable
64     pub symbols: Option<symbols::Symbols<'a>>,
65     /// The dylibs this library depends on
66     pub libs: Vec<&'a str>,
67     /// The runtime search paths for dylibs this library depends on
68     pub rpaths: Vec<&'a str>,
69     /// The entry point (as a virtual memory address), 0 if none
70     pub entry: u64,
71     /// Whether `entry` refers to an older `LC_UNIXTHREAD` instead of the newer `LC_MAIN` entrypoint
72     pub old_style_entry: bool,
73     /// The name of the dylib, if any
74     pub name: Option<&'a str>,
75     /// Are we a little-endian binary?
76     pub little_endian: bool,
77     /// Are we a 64-bit binary
78     pub is_64: bool,
79     data: &'a [u8],
80     ctx: container::Ctx,
81     export_trie: Option<exports::ExportTrie<'a>>,
82     bind_interpreter: Option<imports::BindInterpreter<'a>>,
83 }
84 
85 impl<'a> fmt::Debug for MachO<'a> {
fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result86     fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
87         fmt.debug_struct("MachO")
88             .field("header", &self.header)
89             .field("load_commands", &self.load_commands)
90             .field("segments", &self.segments)
91             .field("entry", &self.entry)
92             .field("old_style_entry", &self.old_style_entry)
93             .field("libs", &self.libs)
94             .field("name", &self.name)
95             .field("little_endian", &self.little_endian)
96             .field("is_64", &self.is_64)
97             .field("symbols()", &self.symbols().collect::<Vec<_>>())
98             .field("exports()", &self.exports())
99             .field("imports()", &self.imports())
100             .finish()
101     }
102 }
103 
104 impl<'a> MachO<'a> {
105     /// Is this a relocatable object file?
is_object_file(&self) -> bool106     pub fn is_object_file(&self) -> bool {
107         self.header.filetype == header::MH_OBJECT
108     }
109     /// Return an iterator over all the symbols in this binary
symbols(&self) -> symbols::SymbolIterator<'a>110     pub fn symbols(&self) -> symbols::SymbolIterator<'a> {
111         if let Some(ref symbols) = self.symbols {
112             symbols.into_iter()
113         } else {
114             symbols::SymbolIterator::default()
115         }
116     }
117     /// Return a vector of the relocations in this binary
relocations( &self, ) -> error::Result<Vec<(usize, segment::RelocationIterator, segment::Section)>>118     pub fn relocations(
119         &self,
120     ) -> error::Result<Vec<(usize, segment::RelocationIterator, segment::Section)>> {
121         debug!("Iterating relocations");
122         let mut relocs = Vec::new();
123         for (_i, segment) in (&self.segments).into_iter().enumerate() {
124             for (j, section) in segment.into_iter().enumerate() {
125                 let (section, _data) = section?;
126                 if section.nreloc > 0 {
127                     relocs.push((j, section.iter_relocations(self.data, self.ctx), section));
128                 }
129             }
130         }
131         Ok(relocs)
132     }
133     /// Return the exported symbols in this binary (if any)
exports(&self) -> error::Result<Vec<exports::Export>>134     pub fn exports(&self) -> error::Result<Vec<exports::Export>> {
135         if let Some(ref trie) = self.export_trie {
136             trie.exports(self.libs.as_slice())
137         } else {
138             Ok(vec![])
139         }
140     }
141     /// Return the imported symbols in this binary that dyld knows about (if any)
imports(&self) -> error::Result<Vec<imports::Import>>142     pub fn imports(&self) -> error::Result<Vec<imports::Import>> {
143         if let Some(ref interpreter) = self.bind_interpreter {
144             interpreter.imports(self.libs.as_slice(), self.segments.as_slice(), self.ctx)
145         } else {
146             Ok(vec![])
147         }
148     }
149     /// Parses the Mach-o binary from `bytes` at `offset`
parse(bytes: &'a [u8], mut offset: usize) -> error::Result<MachO<'a>>150     pub fn parse(bytes: &'a [u8], mut offset: usize) -> error::Result<MachO<'a>> {
151         let (magic, maybe_ctx) = parse_magic_and_ctx(bytes, offset)?;
152         let ctx = if let Some(ctx) = maybe_ctx {
153             ctx
154         } else {
155             return Err(error::Error::BadMagic(u64::from(magic)));
156         };
157         debug!("Ctx: {:?}", ctx);
158         let offset = &mut offset;
159         let header: header::Header = bytes.pread_with(*offset, ctx)?;
160         debug!("Mach-o header: {:?}", header);
161         let little_endian = ctx.le.is_little();
162         let is_64 = ctx.container.is_big();
163         *offset += header::Header::size_with(&ctx.container);
164         let ncmds = header.ncmds;
165         let mut cmds: Vec<load_command::LoadCommand> = Vec::with_capacity(ncmds);
166         let mut symbols = None;
167         let mut libs = vec!["self"];
168         let mut rpaths = vec![];
169         let mut export_trie = None;
170         let mut bind_interpreter = None;
171         let mut unixthread_entry_address = None;
172         let mut main_entry_offset = None;
173         let mut name = None;
174         let mut segments = segment::Segments::new(ctx);
175         for i in 0..ncmds {
176             let cmd = load_command::LoadCommand::parse(bytes, offset, ctx.le)?;
177             debug!("{} - {:?}", i, cmd);
178             match cmd.command {
179                 load_command::CommandVariant::Segment32(command) => {
180                     // FIXME: we may want to be less strict about failure here, and just return an empty segment to allow parsing to continue?
181                     segments.push(segment::Segment::from_32(bytes, &command, cmd.offset, ctx)?)
182                 }
183                 load_command::CommandVariant::Segment64(command) => {
184                     segments.push(segment::Segment::from_64(bytes, &command, cmd.offset, ctx)?)
185                 }
186                 load_command::CommandVariant::Symtab(command) => {
187                     symbols = Some(symbols::Symbols::parse(bytes, &command, ctx)?);
188                 }
189                 load_command::CommandVariant::LoadDylib(command)
190                 | load_command::CommandVariant::LoadUpwardDylib(command)
191                 | load_command::CommandVariant::ReexportDylib(command)
192                 | load_command::CommandVariant::LoadWeakDylib(command)
193                 | load_command::CommandVariant::LazyLoadDylib(command) => {
194                     let lib = bytes.pread::<&str>(cmd.offset + command.dylib.name as usize)?;
195                     libs.push(lib);
196                 }
197                 load_command::CommandVariant::Rpath(command) => {
198                     let rpath = bytes.pread::<&str>(cmd.offset + command.path as usize)?;
199                     rpaths.push(rpath);
200                 }
201                 load_command::CommandVariant::DyldInfo(command)
202                 | load_command::CommandVariant::DyldInfoOnly(command) => {
203                     export_trie = Some(exports::ExportTrie::new(bytes, &command));
204                     bind_interpreter = Some(imports::BindInterpreter::new(bytes, &command));
205                 }
206                 load_command::CommandVariant::Unixthread(command) => {
207                     // dyld cares only about the first LC_UNIXTHREAD
208                     if unixthread_entry_address.is_none() {
209                         unixthread_entry_address =
210                             Some(command.instruction_pointer(header.cputype)?);
211                     }
212                 }
213                 load_command::CommandVariant::Main(command) => {
214                     // dyld cares only about the first LC_MAIN
215                     if main_entry_offset.is_none() {
216                         main_entry_offset = Some(command.entryoff);
217                     }
218                 }
219                 load_command::CommandVariant::IdDylib(command) => {
220                     let id = bytes.pread::<&str>(cmd.offset + command.dylib.name as usize)?;
221                     libs[0] = id;
222                     name = Some(id);
223                 }
224                 _ => (),
225             }
226             cmds.push(cmd)
227         }
228 
229         // dyld prefers LC_MAIN over LC_UNIXTHREAD
230         // choose the same way here
231         let (entry, old_style_entry) = if let Some(offset) = main_entry_offset {
232             // map the entrypoint offset to a virtual memory address
233             let base_address = segments
234                 .iter()
235                 .filter(|s| &s.segname[0..7] == b"__TEXT\0")
236                 .map(|s| s.vmaddr - s.fileoff)
237                 .next()
238                 .ok_or_else(|| {
239                     error::Error::Malformed(format!(
240                         "image specifies LC_MAIN offset {} but has no __TEXT segment",
241                         offset
242                     ))
243                 })?;
244 
245             (base_address + offset, false)
246         } else if let Some(address) = unixthread_entry_address {
247             (address, true)
248         } else {
249             (0, false)
250         };
251 
252         Ok(MachO {
253             header,
254             load_commands: cmds,
255             segments,
256             symbols,
257             libs,
258             rpaths,
259             export_trie,
260             bind_interpreter,
261             entry,
262             old_style_entry,
263             name,
264             ctx,
265             is_64,
266             little_endian,
267             data: bytes,
268         })
269     }
270 }
271 
272 /// A Mach-o multi architecture (Fat) binary container
273 pub struct MultiArch<'a> {
274     data: &'a [u8],
275     start: usize,
276     pub narches: usize,
277 }
278 
279 /// Iterator over the fat architecture headers in a `MultiArch` container
280 pub struct FatArchIterator<'a> {
281     index: usize,
282     data: &'a [u8],
283     narches: usize,
284     start: usize,
285 }
286 
287 impl<'a> Iterator for FatArchIterator<'a> {
288     type Item = error::Result<fat::FatArch>;
next(&mut self) -> Option<Self::Item>289     fn next(&mut self) -> Option<Self::Item> {
290         if self.index >= self.narches {
291             None
292         } else {
293             let offset = (self.index * fat::SIZEOF_FAT_ARCH) + self.start;
294             let arch = self
295                 .data
296                 .pread_with::<fat::FatArch>(offset, scroll::BE)
297                 .map_err(core::convert::Into::into);
298             self.index += 1;
299             Some(arch)
300         }
301     }
302 }
303 
304 /// Iterator over every `MachO` binary contained in this `MultiArch` container
305 pub struct MachOIterator<'a> {
306     index: usize,
307     data: &'a [u8],
308     narches: usize,
309     start: usize,
310 }
311 
312 impl<'a> Iterator for MachOIterator<'a> {
313     type Item = error::Result<MachO<'a>>;
next(&mut self) -> Option<Self::Item>314     fn next(&mut self) -> Option<Self::Item> {
315         if self.index >= self.narches {
316             None
317         } else {
318             let index = self.index;
319             let offset = (index * fat::SIZEOF_FAT_ARCH) + self.start;
320             self.index += 1;
321             match self.data.pread_with::<fat::FatArch>(offset, scroll::BE) {
322                 Ok(arch) => {
323                     let bytes = arch.slice(self.data);
324                     let binary = MachO::parse(bytes, 0);
325                     Some(binary)
326                 }
327                 Err(e) => Some(Err(e.into())),
328             }
329         }
330     }
331 }
332 
333 impl<'a, 'b> IntoIterator for &'b MultiArch<'a> {
334     type Item = error::Result<MachO<'a>>;
335     type IntoIter = MachOIterator<'a>;
into_iter(self) -> Self::IntoIter336     fn into_iter(self) -> Self::IntoIter {
337         MachOIterator {
338             index: 0,
339             data: self.data,
340             narches: self.narches,
341             start: self.start,
342         }
343     }
344 }
345 
346 impl<'a> MultiArch<'a> {
347     /// Lazily construct `Self`
new(bytes: &'a [u8]) -> error::Result<Self>348     pub fn new(bytes: &'a [u8]) -> error::Result<Self> {
349         let header = fat::FatHeader::parse(bytes)?;
350         Ok(MultiArch {
351             data: bytes,
352             start: fat::SIZEOF_FAT_HEADER,
353             narches: header.nfat_arch as usize,
354         })
355     }
356     /// Iterate every fat arch header
iter_arches(&self) -> FatArchIterator357     pub fn iter_arches(&self) -> FatArchIterator {
358         FatArchIterator {
359             index: 0,
360             data: self.data,
361             narches: self.narches,
362             start: self.start,
363         }
364     }
365     /// Return all the architectures in this binary
arches(&self) -> error::Result<Vec<fat::FatArch>>366     pub fn arches(&self) -> error::Result<Vec<fat::FatArch>> {
367         let mut arches = Vec::with_capacity(self.narches);
368         for arch in self.iter_arches() {
369             arches.push(arch?);
370         }
371         Ok(arches)
372     }
373     /// Try to get the Mach-o binary at `index`
get(&self, index: usize) -> error::Result<MachO<'a>>374     pub fn get(&self, index: usize) -> error::Result<MachO<'a>> {
375         if index >= self.narches {
376             return Err(error::Error::Malformed(format!(
377                 "Requested the {}-th binary, but there are only {} architectures in this container",
378                 index, self.narches
379             )));
380         }
381         let offset = (index * fat::SIZEOF_FAT_ARCH) + self.start;
382         let arch = self.data.pread_with::<fat::FatArch>(offset, scroll::BE)?;
383         let bytes = arch.slice(self.data);
384         Ok(MachO::parse(bytes, 0)?)
385     }
386 
find<F: Fn(error::Result<fat::FatArch>) -> bool>( &'a self, f: F, ) -> Option<error::Result<MachO<'a>>>387     pub fn find<F: Fn(error::Result<fat::FatArch>) -> bool>(
388         &'a self,
389         f: F,
390     ) -> Option<error::Result<MachO<'a>>> {
391         for (i, arch) in self.iter_arches().enumerate() {
392             if f(arch) {
393                 return Some(self.get(i));
394             }
395         }
396         None
397     }
398     /// Try and find the `cputype` in `Self`, if there is one
find_cputype(&self, cputype: u32) -> error::Result<Option<fat::FatArch>>399     pub fn find_cputype(&self, cputype: u32) -> error::Result<Option<fat::FatArch>> {
400         for arch in self.iter_arches() {
401             let arch = arch?;
402             if arch.cputype == cputype {
403                 return Ok(Some(arch));
404             }
405         }
406         Ok(None)
407     }
408 }
409 
410 impl<'a> fmt::Debug for MultiArch<'a> {
fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result411     fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
412         fmt.debug_struct("MultiArch")
413             .field("arches", &self.arches().unwrap_or_default())
414             .field("data", &self.data.len())
415             .finish()
416     }
417 }
418 
419 #[derive(Debug)]
420 #[allow(clippy::large_enum_variant)]
421 /// Either a collection of multiple architectures, or a single mach-o binary
422 pub enum Mach<'a> {
423     /// A "fat" multi-architecture binary container
424     Fat(MultiArch<'a>),
425     /// A regular Mach-o binary
426     Binary(MachO<'a>),
427 }
428 
429 impl<'a> Mach<'a> {
430     /// Parse from `bytes` either a multi-arch binary or a regular mach-o binary
parse(bytes: &'a [u8]) -> error::Result<Self>431     pub fn parse(bytes: &'a [u8]) -> error::Result<Self> {
432         let size = bytes.len();
433         if size < 4 {
434             let error = error::Error::Malformed("size is smaller than a magical number".into());
435             return Err(error);
436         }
437         let magic = peek(&bytes, 0)?;
438         match magic {
439             fat::FAT_MAGIC => {
440                 let multi = MultiArch::new(bytes)?;
441                 Ok(Mach::Fat(multi))
442             }
443             // we might be a regular binary
444             _ => {
445                 let binary = MachO::parse(bytes, 0)?;
446                 Ok(Mach::Binary(binary))
447             }
448         }
449     }
450 }
451