1 //! The Mach-o, mostly zero-copy, binary format parser and raw struct definitions
2 use alloc::vec::Vec;
3 use core::fmt;
4
5 use log::debug;
6
7 use scroll::ctx::SizeWith;
8 use scroll::{Pread, BE};
9
10 use crate::container;
11 use crate::error;
12
13 pub mod bind_opcodes;
14 pub mod constants;
15 pub mod exports;
16 pub mod fat;
17 pub mod header;
18 pub mod imports;
19 pub mod load_command;
20 pub mod relocation;
21 pub mod segment;
22 pub mod symbols;
23
24 pub use self::constants::cputype;
25
26 /// Returns a big endian magical number
peek(bytes: &[u8], offset: usize) -> error::Result<u32>27 pub fn peek(bytes: &[u8], offset: usize) -> error::Result<u32> {
28 Ok(bytes.pread_with::<u32>(offset, scroll::BE)?)
29 }
30
31 /// Parses a magic number, and an accompanying mach-o binary parsing context, according to the magic number.
parse_magic_and_ctx( bytes: &[u8], offset: usize, ) -> error::Result<(u32, Option<container::Ctx>)>32 pub fn parse_magic_and_ctx(
33 bytes: &[u8],
34 offset: usize,
35 ) -> error::Result<(u32, Option<container::Ctx>)> {
36 use crate::container::Container;
37 use crate::mach::header::*;
38 let magic = bytes.pread_with::<u32>(offset, BE)?;
39 let ctx = match magic {
40 MH_CIGAM_64 | MH_CIGAM | MH_MAGIC_64 | MH_MAGIC => {
41 let is_lsb = magic == MH_CIGAM || magic == MH_CIGAM_64;
42 let le = scroll::Endian::from(is_lsb);
43 let container = if magic == MH_MAGIC_64 || magic == MH_CIGAM_64 {
44 Container::Big
45 } else {
46 Container::Little
47 };
48 Some(container::Ctx::new(container, le))
49 }
50 _ => None,
51 };
52 Ok((magic, ctx))
53 }
54
55 /// A cross-platform, zero-copy, endian-aware, 32/64 bit Mach-o binary parser
56 pub struct MachO<'a> {
57 /// The mach-o header
58 pub header: header::Header,
59 /// The load commands tell the kernel and dynamic linker how to use/interpret this binary
60 pub load_commands: Vec<load_command::LoadCommand>,
61 /// The load command "segments" - typically the pieces of the binary that are loaded into memory
62 pub segments: segment::Segments<'a>,
63 /// The "Nlist" style symbols in this binary - strippable
64 pub symbols: Option<symbols::Symbols<'a>>,
65 /// The dylibs this library depends on
66 pub libs: Vec<&'a str>,
67 /// The runtime search paths for dylibs this library depends on
68 pub rpaths: Vec<&'a str>,
69 /// The entry point (as a virtual memory address), 0 if none
70 pub entry: u64,
71 /// Whether `entry` refers to an older `LC_UNIXTHREAD` instead of the newer `LC_MAIN` entrypoint
72 pub old_style_entry: bool,
73 /// The name of the dylib, if any
74 pub name: Option<&'a str>,
75 /// Are we a little-endian binary?
76 pub little_endian: bool,
77 /// Are we a 64-bit binary
78 pub is_64: bool,
79 data: &'a [u8],
80 ctx: container::Ctx,
81 export_trie: Option<exports::ExportTrie<'a>>,
82 bind_interpreter: Option<imports::BindInterpreter<'a>>,
83 }
84
85 impl<'a> fmt::Debug for MachO<'a> {
fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result86 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
87 fmt.debug_struct("MachO")
88 .field("header", &self.header)
89 .field("load_commands", &self.load_commands)
90 .field("segments", &self.segments)
91 .field("entry", &self.entry)
92 .field("old_style_entry", &self.old_style_entry)
93 .field("libs", &self.libs)
94 .field("name", &self.name)
95 .field("little_endian", &self.little_endian)
96 .field("is_64", &self.is_64)
97 .field("symbols()", &self.symbols().collect::<Vec<_>>())
98 .field("exports()", &self.exports())
99 .field("imports()", &self.imports())
100 .finish()
101 }
102 }
103
104 impl<'a> MachO<'a> {
105 /// Is this a relocatable object file?
is_object_file(&self) -> bool106 pub fn is_object_file(&self) -> bool {
107 self.header.filetype == header::MH_OBJECT
108 }
109 /// Return an iterator over all the symbols in this binary
symbols(&self) -> symbols::SymbolIterator<'a>110 pub fn symbols(&self) -> symbols::SymbolIterator<'a> {
111 if let Some(ref symbols) = self.symbols {
112 symbols.into_iter()
113 } else {
114 symbols::SymbolIterator::default()
115 }
116 }
117 /// Return a vector of the relocations in this binary
relocations( &self, ) -> error::Result<Vec<(usize, segment::RelocationIterator, segment::Section)>>118 pub fn relocations(
119 &self,
120 ) -> error::Result<Vec<(usize, segment::RelocationIterator, segment::Section)>> {
121 debug!("Iterating relocations");
122 let mut relocs = Vec::new();
123 for (_i, segment) in (&self.segments).into_iter().enumerate() {
124 for (j, section) in segment.into_iter().enumerate() {
125 let (section, _data) = section?;
126 if section.nreloc > 0 {
127 relocs.push((j, section.iter_relocations(self.data, self.ctx), section));
128 }
129 }
130 }
131 Ok(relocs)
132 }
133 /// Return the exported symbols in this binary (if any)
exports(&self) -> error::Result<Vec<exports::Export>>134 pub fn exports(&self) -> error::Result<Vec<exports::Export>> {
135 if let Some(ref trie) = self.export_trie {
136 trie.exports(self.libs.as_slice())
137 } else {
138 Ok(vec![])
139 }
140 }
141 /// Return the imported symbols in this binary that dyld knows about (if any)
imports(&self) -> error::Result<Vec<imports::Import>>142 pub fn imports(&self) -> error::Result<Vec<imports::Import>> {
143 if let Some(ref interpreter) = self.bind_interpreter {
144 interpreter.imports(self.libs.as_slice(), self.segments.as_slice(), self.ctx)
145 } else {
146 Ok(vec![])
147 }
148 }
149 /// Parses the Mach-o binary from `bytes` at `offset`
parse(bytes: &'a [u8], mut offset: usize) -> error::Result<MachO<'a>>150 pub fn parse(bytes: &'a [u8], mut offset: usize) -> error::Result<MachO<'a>> {
151 let (magic, maybe_ctx) = parse_magic_and_ctx(bytes, offset)?;
152 let ctx = if let Some(ctx) = maybe_ctx {
153 ctx
154 } else {
155 return Err(error::Error::BadMagic(u64::from(magic)));
156 };
157 debug!("Ctx: {:?}", ctx);
158 let offset = &mut offset;
159 let header: header::Header = bytes.pread_with(*offset, ctx)?;
160 debug!("Mach-o header: {:?}", header);
161 let little_endian = ctx.le.is_little();
162 let is_64 = ctx.container.is_big();
163 *offset += header::Header::size_with(&ctx.container);
164 let ncmds = header.ncmds;
165 let mut cmds: Vec<load_command::LoadCommand> = Vec::with_capacity(ncmds);
166 let mut symbols = None;
167 let mut libs = vec!["self"];
168 let mut rpaths = vec![];
169 let mut export_trie = None;
170 let mut bind_interpreter = None;
171 let mut unixthread_entry_address = None;
172 let mut main_entry_offset = None;
173 let mut name = None;
174 let mut segments = segment::Segments::new(ctx);
175 for i in 0..ncmds {
176 let cmd = load_command::LoadCommand::parse(bytes, offset, ctx.le)?;
177 debug!("{} - {:?}", i, cmd);
178 match cmd.command {
179 load_command::CommandVariant::Segment32(command) => {
180 // FIXME: we may want to be less strict about failure here, and just return an empty segment to allow parsing to continue?
181 segments.push(segment::Segment::from_32(bytes, &command, cmd.offset, ctx)?)
182 }
183 load_command::CommandVariant::Segment64(command) => {
184 segments.push(segment::Segment::from_64(bytes, &command, cmd.offset, ctx)?)
185 }
186 load_command::CommandVariant::Symtab(command) => {
187 symbols = Some(symbols::Symbols::parse(bytes, &command, ctx)?);
188 }
189 load_command::CommandVariant::LoadDylib(command)
190 | load_command::CommandVariant::LoadUpwardDylib(command)
191 | load_command::CommandVariant::ReexportDylib(command)
192 | load_command::CommandVariant::LoadWeakDylib(command)
193 | load_command::CommandVariant::LazyLoadDylib(command) => {
194 let lib = bytes.pread::<&str>(cmd.offset + command.dylib.name as usize)?;
195 libs.push(lib);
196 }
197 load_command::CommandVariant::Rpath(command) => {
198 let rpath = bytes.pread::<&str>(cmd.offset + command.path as usize)?;
199 rpaths.push(rpath);
200 }
201 load_command::CommandVariant::DyldInfo(command)
202 | load_command::CommandVariant::DyldInfoOnly(command) => {
203 export_trie = Some(exports::ExportTrie::new(bytes, &command));
204 bind_interpreter = Some(imports::BindInterpreter::new(bytes, &command));
205 }
206 load_command::CommandVariant::Unixthread(command) => {
207 // dyld cares only about the first LC_UNIXTHREAD
208 if unixthread_entry_address.is_none() {
209 unixthread_entry_address =
210 Some(command.instruction_pointer(header.cputype)?);
211 }
212 }
213 load_command::CommandVariant::Main(command) => {
214 // dyld cares only about the first LC_MAIN
215 if main_entry_offset.is_none() {
216 main_entry_offset = Some(command.entryoff);
217 }
218 }
219 load_command::CommandVariant::IdDylib(command) => {
220 let id = bytes.pread::<&str>(cmd.offset + command.dylib.name as usize)?;
221 libs[0] = id;
222 name = Some(id);
223 }
224 _ => (),
225 }
226 cmds.push(cmd)
227 }
228
229 // dyld prefers LC_MAIN over LC_UNIXTHREAD
230 // choose the same way here
231 let (entry, old_style_entry) = if let Some(offset) = main_entry_offset {
232 // map the entrypoint offset to a virtual memory address
233 let base_address = segments
234 .iter()
235 .filter(|s| &s.segname[0..7] == b"__TEXT\0")
236 .map(|s| s.vmaddr - s.fileoff)
237 .next()
238 .ok_or_else(|| {
239 error::Error::Malformed(format!(
240 "image specifies LC_MAIN offset {} but has no __TEXT segment",
241 offset
242 ))
243 })?;
244
245 (base_address + offset, false)
246 } else if let Some(address) = unixthread_entry_address {
247 (address, true)
248 } else {
249 (0, false)
250 };
251
252 Ok(MachO {
253 header,
254 load_commands: cmds,
255 segments,
256 symbols,
257 libs,
258 rpaths,
259 export_trie,
260 bind_interpreter,
261 entry,
262 old_style_entry,
263 name,
264 ctx,
265 is_64,
266 little_endian,
267 data: bytes,
268 })
269 }
270 }
271
272 /// A Mach-o multi architecture (Fat) binary container
273 pub struct MultiArch<'a> {
274 data: &'a [u8],
275 start: usize,
276 pub narches: usize,
277 }
278
279 /// Iterator over the fat architecture headers in a `MultiArch` container
280 pub struct FatArchIterator<'a> {
281 index: usize,
282 data: &'a [u8],
283 narches: usize,
284 start: usize,
285 }
286
287 impl<'a> Iterator for FatArchIterator<'a> {
288 type Item = error::Result<fat::FatArch>;
next(&mut self) -> Option<Self::Item>289 fn next(&mut self) -> Option<Self::Item> {
290 if self.index >= self.narches {
291 None
292 } else {
293 let offset = (self.index * fat::SIZEOF_FAT_ARCH) + self.start;
294 let arch = self
295 .data
296 .pread_with::<fat::FatArch>(offset, scroll::BE)
297 .map_err(core::convert::Into::into);
298 self.index += 1;
299 Some(arch)
300 }
301 }
302 }
303
304 /// Iterator over every `MachO` binary contained in this `MultiArch` container
305 pub struct MachOIterator<'a> {
306 index: usize,
307 data: &'a [u8],
308 narches: usize,
309 start: usize,
310 }
311
312 impl<'a> Iterator for MachOIterator<'a> {
313 type Item = error::Result<MachO<'a>>;
next(&mut self) -> Option<Self::Item>314 fn next(&mut self) -> Option<Self::Item> {
315 if self.index >= self.narches {
316 None
317 } else {
318 let index = self.index;
319 let offset = (index * fat::SIZEOF_FAT_ARCH) + self.start;
320 self.index += 1;
321 match self.data.pread_with::<fat::FatArch>(offset, scroll::BE) {
322 Ok(arch) => {
323 let bytes = arch.slice(self.data);
324 let binary = MachO::parse(bytes, 0);
325 Some(binary)
326 }
327 Err(e) => Some(Err(e.into())),
328 }
329 }
330 }
331 }
332
333 impl<'a, 'b> IntoIterator for &'b MultiArch<'a> {
334 type Item = error::Result<MachO<'a>>;
335 type IntoIter = MachOIterator<'a>;
into_iter(self) -> Self::IntoIter336 fn into_iter(self) -> Self::IntoIter {
337 MachOIterator {
338 index: 0,
339 data: self.data,
340 narches: self.narches,
341 start: self.start,
342 }
343 }
344 }
345
346 impl<'a> MultiArch<'a> {
347 /// Lazily construct `Self`
new(bytes: &'a [u8]) -> error::Result<Self>348 pub fn new(bytes: &'a [u8]) -> error::Result<Self> {
349 let header = fat::FatHeader::parse(bytes)?;
350 Ok(MultiArch {
351 data: bytes,
352 start: fat::SIZEOF_FAT_HEADER,
353 narches: header.nfat_arch as usize,
354 })
355 }
356 /// Iterate every fat arch header
iter_arches(&self) -> FatArchIterator357 pub fn iter_arches(&self) -> FatArchIterator {
358 FatArchIterator {
359 index: 0,
360 data: self.data,
361 narches: self.narches,
362 start: self.start,
363 }
364 }
365 /// Return all the architectures in this binary
arches(&self) -> error::Result<Vec<fat::FatArch>>366 pub fn arches(&self) -> error::Result<Vec<fat::FatArch>> {
367 let mut arches = Vec::with_capacity(self.narches);
368 for arch in self.iter_arches() {
369 arches.push(arch?);
370 }
371 Ok(arches)
372 }
373 /// Try to get the Mach-o binary at `index`
get(&self, index: usize) -> error::Result<MachO<'a>>374 pub fn get(&self, index: usize) -> error::Result<MachO<'a>> {
375 if index >= self.narches {
376 return Err(error::Error::Malformed(format!(
377 "Requested the {}-th binary, but there are only {} architectures in this container",
378 index, self.narches
379 )));
380 }
381 let offset = (index * fat::SIZEOF_FAT_ARCH) + self.start;
382 let arch = self.data.pread_with::<fat::FatArch>(offset, scroll::BE)?;
383 let bytes = arch.slice(self.data);
384 Ok(MachO::parse(bytes, 0)?)
385 }
386
find<F: Fn(error::Result<fat::FatArch>) -> bool>( &'a self, f: F, ) -> Option<error::Result<MachO<'a>>>387 pub fn find<F: Fn(error::Result<fat::FatArch>) -> bool>(
388 &'a self,
389 f: F,
390 ) -> Option<error::Result<MachO<'a>>> {
391 for (i, arch) in self.iter_arches().enumerate() {
392 if f(arch) {
393 return Some(self.get(i));
394 }
395 }
396 None
397 }
398 /// Try and find the `cputype` in `Self`, if there is one
find_cputype(&self, cputype: u32) -> error::Result<Option<fat::FatArch>>399 pub fn find_cputype(&self, cputype: u32) -> error::Result<Option<fat::FatArch>> {
400 for arch in self.iter_arches() {
401 let arch = arch?;
402 if arch.cputype == cputype {
403 return Ok(Some(arch));
404 }
405 }
406 Ok(None)
407 }
408 }
409
410 impl<'a> fmt::Debug for MultiArch<'a> {
fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result411 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
412 fmt.debug_struct("MultiArch")
413 .field("arches", &self.arches().unwrap_or_default())
414 .field("data", &self.data.len())
415 .finish()
416 }
417 }
418
419 #[derive(Debug)]
420 #[allow(clippy::large_enum_variant)]
421 /// Either a collection of multiple architectures, or a single mach-o binary
422 pub enum Mach<'a> {
423 /// A "fat" multi-architecture binary container
424 Fat(MultiArch<'a>),
425 /// A regular Mach-o binary
426 Binary(MachO<'a>),
427 }
428
429 impl<'a> Mach<'a> {
430 /// Parse from `bytes` either a multi-arch binary or a regular mach-o binary
parse(bytes: &'a [u8]) -> error::Result<Self>431 pub fn parse(bytes: &'a [u8]) -> error::Result<Self> {
432 let size = bytes.len();
433 if size < 4 {
434 let error = error::Error::Malformed("size is smaller than a magical number".into());
435 return Err(error);
436 }
437 let magic = peek(&bytes, 0)?;
438 match magic {
439 fat::FAT_MAGIC => {
440 let multi = MultiArch::new(bytes)?;
441 Ok(Mach::Fat(multi))
442 }
443 // we might be a regular binary
444 _ => {
445 let binary = MachO::parse(bytes, 0)?;
446 Ok(Mach::Binary(binary))
447 }
448 }
449 }
450 }
451