1 //! The Mach-o, mostly zero-copy, binary format parser and raw struct definitions
2 use core::fmt;
3 use crate::alloc::vec::Vec;
4
5 use log::debug;
6
7 use scroll::{Pread, BE};
8 use scroll::ctx::SizeWith;
9
10 use crate::error;
11 use crate::container;
12
13 pub mod header;
14 pub mod constants;
15 pub mod fat;
16 pub mod load_command;
17 pub mod symbols;
18 pub mod exports;
19 pub mod imports;
20 pub mod bind_opcodes;
21 pub mod relocation;
22 pub mod segment;
23
24 pub use self::constants::cputype as cputype;
25
26 /// Returns a big endian magical number
peek(bytes: &[u8], offset: usize) -> error::Result<u32>27 pub fn peek(bytes: &[u8], offset: usize) -> error::Result<u32> {
28 Ok(bytes.pread_with::<u32>(offset, scroll::BE)?)
29 }
30
31 /// Parses a magic number, and an accompanying mach-o binary parsing context, according to the magic number.
parse_magic_and_ctx(bytes: &[u8], offset: usize) -> error::Result<(u32, Option<container::Ctx>)>32 pub fn parse_magic_and_ctx(bytes: &[u8], offset: usize) -> error::Result<(u32, Option<container::Ctx>)> {
33 use crate::mach::header::*;
34 use crate::container::Container;
35 let magic = bytes.pread_with::<u32>(offset, BE)?;
36 let ctx = match magic {
37 MH_CIGAM_64 | MH_CIGAM | MH_MAGIC_64 | MH_MAGIC => {
38 let is_lsb = magic == MH_CIGAM || magic == MH_CIGAM_64;
39 let le = scroll::Endian::from(is_lsb);
40 let container = if magic == MH_MAGIC_64 || magic == MH_CIGAM_64 { Container::Big } else { Container::Little };
41 Some(container::Ctx::new(container, le))
42 },
43 _ => None,
44 };
45 Ok((magic, ctx))
46 }
47
48 /// A cross-platform, zero-copy, endian-aware, 32/64 bit Mach-o binary parser
49 pub struct MachO<'a> {
50 /// The mach-o header
51 pub header: header::Header,
52 /// The load commands tell the kernel and dynamic linker how to use/interpret this binary
53 pub load_commands: Vec<load_command::LoadCommand>,
54 /// The load command "segments" - typically the pieces of the binary that are loaded into memory
55 pub segments: segment::Segments<'a>,
56 /// The "Nlist" style symbols in this binary - strippable
57 pub symbols: Option<symbols::Symbols<'a>>,
58 /// The dylibs this library depends on
59 pub libs: Vec<&'a str>,
60 /// The entry point (as a virtual memory address), 0 if none
61 pub entry: u64,
62 /// Whether `entry` refers to an older `LC_UNIXTHREAD` instead of the newer `LC_MAIN` entrypoint
63 pub old_style_entry: bool,
64 /// The name of the dylib, if any
65 pub name: Option<&'a str>,
66 /// Are we a little-endian binary?
67 pub little_endian: bool,
68 /// Are we a 64-bit binary
69 pub is_64: bool,
70 data: &'a [u8],
71 ctx: container::Ctx,
72 export_trie: Option<exports::ExportTrie<'a>>,
73 bind_interpreter: Option<imports::BindInterpreter<'a>>,
74 }
75
76 impl<'a> fmt::Debug for MachO<'a> {
fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result77 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
78 fmt.debug_struct("MachO")
79 .field("header", &self.header)
80 .field("load_commands", &self.load_commands)
81 .field("segments", &self.segments)
82 .field("entry", &self.entry)
83 .field("old_style_entry", &self.old_style_entry)
84 .field("libs", &self.libs)
85 .field("name", &self.name)
86 .field("little_endian", &self.little_endian)
87 .field("is_64", &self.is_64)
88 .field("symbols()", &self.symbols().collect::<Vec<_>>())
89 .field("exports()", &self.exports())
90 .field("imports()", &self.imports())
91 .finish()
92 }
93 }
94
95 impl<'a> MachO<'a> {
96 /// Is this a relocatable object file?
is_object_file(&self) -> bool97 pub fn is_object_file(&self) -> bool {
98 self.header.filetype == header::MH_OBJECT
99 }
100 /// Return an iterator over all the symbols in this binary
symbols(&self) -> symbols::SymbolIterator<'a>101 pub fn symbols(&self) -> symbols::SymbolIterator<'a> {
102 if let Some(ref symbols) = self.symbols {
103 symbols.into_iter()
104 } else {
105 symbols::SymbolIterator::default()
106 }
107 }
108 /// Return a vector of the relocations in this binary
relocations(&self) -> error::Result<Vec<(usize, segment::RelocationIterator, segment::Section)>>109 pub fn relocations(&self) -> error::Result<Vec<(usize, segment::RelocationIterator, segment::Section)>> {
110 debug!("Iterating relocations");
111 let mut relocs = Vec::new();
112 for (_i, segment) in (&self.segments).into_iter().enumerate() {
113 for (j, section) in segment.into_iter().enumerate() {
114 let (section, _data) = section?;
115 if section.nreloc > 0 {
116 relocs.push((j, section.iter_relocations(self.data, self.ctx), section));
117 }
118 }
119 }
120 Ok(relocs)
121 }
122 /// Return the exported symbols in this binary (if any)
exports(&self) -> error::Result<Vec<exports::Export>>123 pub fn exports(&self) -> error::Result<Vec<exports::Export>> {
124 if let Some(ref trie) = self.export_trie {
125 trie.exports(self.libs.as_slice())
126 } else {
127 Ok(vec![])
128 }
129 }
130 /// Return the imported symbols in this binary that dyld knows about (if any)
imports(&self) -> error::Result<Vec<imports::Import>>131 pub fn imports(&self) -> error::Result<Vec<imports::Import>> {
132 if let Some(ref interpreter) = self.bind_interpreter {
133 interpreter.imports(self.libs.as_slice(), self.segments.as_slice(), self.ctx)
134 } else {
135 Ok(vec![])
136 }
137 }
138 /// Parses the Mach-o binary from `bytes` at `offset`
parse(bytes: &'a [u8], mut offset: usize) -> error::Result<MachO<'a>>139 pub fn parse(bytes: &'a [u8], mut offset: usize) -> error::Result<MachO<'a>> {
140 let (magic, maybe_ctx) = parse_magic_and_ctx(bytes, offset)?;
141 let ctx = if let Some(ctx) = maybe_ctx { ctx } else { return Err(error::Error::BadMagic(u64::from(magic))) };
142 debug!("Ctx: {:?}", ctx);
143 let offset = &mut offset;
144 let header: header::Header = bytes.pread_with(*offset, ctx)?;
145 debug!("Mach-o header: {:?}", header);
146 let little_endian = ctx.le.is_little();
147 let is_64 = ctx.container.is_big();
148 *offset += header::Header::size_with(&ctx.container);
149 let ncmds = header.ncmds;
150 let mut cmds: Vec<load_command::LoadCommand> = Vec::with_capacity(ncmds);
151 let mut symbols = None;
152 let mut libs = vec!["self"];
153 let mut export_trie = None;
154 let mut bind_interpreter = None;
155 let mut unixthread_entry_address = None;
156 let mut main_entry_offset = None;
157 let mut name = None;
158 let mut segments = segment::Segments::new(ctx);
159 for i in 0..ncmds {
160 let cmd = load_command::LoadCommand::parse(bytes, offset, ctx.le)?;
161 debug!("{} - {:?}", i, cmd);
162 match cmd.command {
163 load_command::CommandVariant::Segment32(command) => {
164 // FIXME: we may want to be less strict about failure here, and just return an empty segment to allow parsing to continue?
165 segments.push(segment::Segment::from_32(bytes, &command, cmd.offset, ctx)?)
166 },
167 load_command::CommandVariant::Segment64(command) => {
168 segments.push(segment::Segment::from_64(bytes, &command, cmd.offset, ctx)?)
169 },
170 load_command::CommandVariant::Symtab(command) => {
171 symbols = Some(symbols::Symbols::parse(bytes, &command, ctx)?);
172 },
173 load_command::CommandVariant::LoadDylib (command)
174 | load_command::CommandVariant::LoadUpwardDylib(command)
175 | load_command::CommandVariant::ReexportDylib (command)
176 | load_command::CommandVariant::LoadWeakDylib (command)
177 | load_command::CommandVariant::LazyLoadDylib (command) => {
178 let lib = bytes.pread::<&str>(cmd.offset + command.dylib.name as usize)?;
179 libs.push(lib);
180 },
181 load_command::CommandVariant::DyldInfo (command)
182 | load_command::CommandVariant::DyldInfoOnly(command) => {
183 export_trie = Some(exports::ExportTrie::new(bytes, &command));
184 bind_interpreter = Some(imports::BindInterpreter::new(bytes, &command));
185 },
186 load_command::CommandVariant::Unixthread(command) => {
187 // dyld cares only about the first LC_UNIXTHREAD
188 if unixthread_entry_address.is_none() {
189 unixthread_entry_address = Some(command.instruction_pointer(header.cputype)?);
190 }
191 },
192 load_command::CommandVariant::Main(command) => {
193 // dyld cares only about the first LC_MAIN
194 if main_entry_offset.is_none() {
195 main_entry_offset = Some(command.entryoff);
196 }
197 },
198 load_command::CommandVariant::IdDylib(command) => {
199 let id = bytes.pread::<&str>(cmd.offset + command.dylib.name as usize)?;
200 libs[0] = id;
201 name = Some(id);
202 },
203 _ => ()
204 }
205 cmds.push(cmd)
206 }
207
208 // dyld prefers LC_MAIN over LC_UNIXTHREAD
209 // choose the same way here
210 let (entry, old_style_entry) = if let Some(offset) = main_entry_offset {
211 // map the entrypoint offset to a virtual memory address
212 let base_address = segments.iter()
213 .filter(|s| &s.segname[0..7] == b"__TEXT\0")
214 .map(|s| s.vmaddr - s.fileoff)
215 .next()
216 .ok_or_else(||
217 error::Error::Malformed(format!("image specifies LC_MAIN offset {} but has no __TEXT segment", offset))
218 )?;
219
220 (base_address + offset, false)
221 } else if let Some(address) = unixthread_entry_address {
222 (address, true)
223 } else {
224 (0, false)
225 };
226
227 Ok(MachO {
228 header,
229 load_commands: cmds,
230 segments,
231 symbols,
232 libs,
233 export_trie,
234 bind_interpreter,
235 entry,
236 old_style_entry,
237 name,
238 ctx,
239 is_64,
240 little_endian,
241 data: bytes,
242 })
243 }
244 }
245
246 /// A Mach-o multi architecture (Fat) binary container
247 pub struct MultiArch<'a> {
248 data: &'a [u8],
249 start: usize,
250 pub narches: usize,
251 }
252
253 /// Iterator over the fat architecture headers in a `MultiArch` container
254 pub struct FatArchIterator<'a> {
255 index: usize,
256 data: &'a[u8],
257 narches: usize,
258 start: usize,
259 }
260
261 impl<'a> Iterator for FatArchIterator<'a> {
262 type Item = error::Result<fat::FatArch>;
next(&mut self) -> Option<Self::Item>263 fn next(&mut self) -> Option<Self::Item> {
264 if self.index >= self.narches {
265 None
266 } else {
267 let offset = (self.index * fat::SIZEOF_FAT_ARCH) + self.start;
268 let arch = self.data.pread_with::<fat::FatArch>(offset, scroll::BE).map_err(core::convert::Into::into);
269 self.index += 1;
270 Some(arch)
271 }
272 }
273 }
274
275 /// Iterator over every `MachO` binary contained in this `MultiArch` container
276 pub struct MachOIterator<'a> {
277 index: usize,
278 data: &'a[u8],
279 narches: usize,
280 start: usize,
281 }
282
283 impl<'a> Iterator for MachOIterator<'a> {
284 type Item = error::Result<MachO<'a>>;
next(&mut self) -> Option<Self::Item>285 fn next(&mut self) -> Option<Self::Item> {
286 if self.index >= self.narches {
287 None
288 } else {
289 let index = self.index;
290 let offset = (index * fat::SIZEOF_FAT_ARCH) + self.start;
291 self.index += 1;
292 match self.data.pread_with::<fat::FatArch>(offset, scroll::BE) {
293 Ok(arch) => {
294 let bytes = arch.slice(self.data);
295 let binary = MachO::parse(bytes, 0);
296 Some(binary)
297 },
298 Err(e) => Some(Err(e.into()))
299 }
300 }
301 }
302 }
303
304 impl<'a, 'b> IntoIterator for &'b MultiArch<'a> {
305 type Item = error::Result<MachO<'a>>;
306 type IntoIter = MachOIterator<'a>;
into_iter(self) -> Self::IntoIter307 fn into_iter(self) -> Self::IntoIter {
308 MachOIterator {
309 index: 0,
310 data: self.data,
311 narches: self.narches,
312 start: self.start,
313 }
314 }
315 }
316
317 impl<'a> MultiArch<'a> {
318 /// Lazily construct `Self`
new(bytes: &'a [u8]) -> error::Result<Self>319 pub fn new(bytes: &'a [u8]) -> error::Result<Self> {
320 let header = fat::FatHeader::parse(bytes)?;
321 Ok(MultiArch {
322 data: bytes,
323 start: fat::SIZEOF_FAT_HEADER,
324 narches: header.nfat_arch as usize
325 })
326 }
327 /// Iterate every fat arch header
iter_arches(&self) -> FatArchIterator328 pub fn iter_arches(&self) -> FatArchIterator {
329 FatArchIterator {
330 index: 0,
331 data: self.data,
332 narches: self.narches,
333 start: self.start,
334 }
335 }
336 /// Return all the architectures in this binary
arches(&self) -> error::Result<Vec<fat::FatArch>>337 pub fn arches(&self) -> error::Result<Vec<fat::FatArch>> {
338 let mut arches = Vec::with_capacity(self.narches);
339 for arch in self.iter_arches() {
340 arches.push(arch?);
341 }
342 Ok(arches)
343 }
344 /// Try to get the Mach-o binary at `index`
get(&self, index: usize) -> error::Result<MachO<'a>>345 pub fn get(&self, index: usize) -> error::Result<MachO<'a>> {
346 if index >= self.narches {
347 return Err(error::Error::Malformed(format!("Requested the {}-th binary, but there are only {} architectures in this container", index, self.narches)))
348 }
349 let offset = (index * fat::SIZEOF_FAT_ARCH) + self.start;
350 let arch = self.data.pread_with::<fat::FatArch>(offset, scroll::BE)?;
351 let bytes = arch.slice(self.data);
352 Ok(MachO::parse(bytes, 0)?)
353 }
354
find<F: Fn(error::Result<fat::FatArch>) -> bool>(&'a self, f: F) -> Option<error::Result<MachO<'a>>>355 pub fn find<F: Fn(error::Result<fat::FatArch>) -> bool>(&'a self, f: F) -> Option<error::Result<MachO<'a>>> {
356 for (i, arch) in self.iter_arches().enumerate() {
357 if f(arch) {
358 return Some(self.get(i));
359 }
360 }
361 None
362 }
363 /// Try and find the `cputype` in `Self`, if there is one
find_cputype(&self, cputype: u32) -> error::Result<Option<fat::FatArch>>364 pub fn find_cputype(&self, cputype: u32) -> error::Result<Option<fat::FatArch>> {
365 for arch in self.iter_arches() {
366 let arch = arch?;
367 if arch.cputype == cputype { return Ok(Some(arch)) }
368 }
369 Ok(None)
370 }
371 }
372
373 impl<'a> fmt::Debug for MultiArch<'a> {
fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result374 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
375 fmt.debug_struct("MultiArch")
376 .field("arches", &self.arches().unwrap())
377 .field("data", &self.data.len())
378 .finish()
379 }
380 }
381
382 #[derive(Debug)]
383 #[allow(clippy::large_enum_variant)]
384 /// Either a collection of multiple architectures, or a single mach-o binary
385 pub enum Mach<'a> {
386 /// A "fat" multi-architecture binary container
387 Fat(MultiArch<'a>),
388 /// A regular Mach-o binary
389 Binary(MachO<'a>)
390 }
391
392 impl<'a> Mach<'a> {
393 /// Parse from `bytes` either a multi-arch binary or a regular mach-o binary
parse(bytes: &'a [u8]) -> error::Result<Self>394 pub fn parse(bytes: &'a [u8]) -> error::Result<Self> {
395 let size = bytes.len();
396 if size < 4 {
397 let error = error::Error::Malformed("size is smaller than a magical number".into());
398 return Err(error);
399 }
400 let magic = peek(&bytes, 0)?;
401 match magic {
402 fat::FAT_MAGIC => {
403 let multi = MultiArch::new(bytes)?;
404 Ok(Mach::Fat(multi))
405 },
406 // we might be a regular binary
407 _ => {
408 let binary = MachO::parse(bytes, 0)?;
409 Ok(Mach::Binary(binary))
410 }
411 }
412 }
413 }
414