1 use super::{Box, Context, Mapping, Path, Stash, Vec};
2 use core::convert::TryInto;
3 use object::macho;
4 use object::read::macho::{MachHeader, Nlist, Section, Segment as _};
5 use object::{Bytes, NativeEndian};
6 
7 #[cfg(target_pointer_width = "32")]
8 type Mach = object::macho::MachHeader32<NativeEndian>;
9 #[cfg(target_pointer_width = "64")]
10 type Mach = object::macho::MachHeader64<NativeEndian>;
11 type MachSegment = <Mach as MachHeader>::Segment;
12 type MachSection = <Mach as MachHeader>::Section;
13 type MachNlist = <Mach as MachHeader>::Nlist;
14 
15 impl Mapping {
16     // The loading path for OSX is is so different we just have a completely
17     // different implementation of the function here. On OSX we need to go
18     // probing the filesystem for a bunch of files.
new(path: &Path) -> Option<Mapping>19     pub fn new(path: &Path) -> Option<Mapping> {
20         // First up we need to load the unique UUID which is stored in the macho
21         // header of the file we're reading, specified at `path`.
22         let map = super::mmap(path)?;
23         let (macho, data) = find_header(&map)?;
24         let endian = macho.endian().ok()?;
25         let uuid = macho.uuid(endian, data, 0).ok()?;
26 
27         // Next we need to look for a `*.dSYM` file. For now we just probe the
28         // containing directory and look around for something that matches
29         // `*.dSYM`. Once it's found we root through the dwarf resources that it
30         // contains and try to find a macho file which has a matching UUID as
31         // the one of our own file. If we find a match that's the dwarf file we
32         // want to return.
33         if let Some(uuid) = uuid {
34             if let Some(parent) = path.parent() {
35                 if let Some(mapping) = Mapping::load_dsym(parent, uuid) {
36                     return Some(mapping);
37                 }
38             }
39         }
40 
41         // Looks like nothing matched our UUID, so let's at least return our own
42         // file. This should have the symbol table for at least some
43         // symbolication purposes.
44         Mapping::mk(map, |data, stash| {
45             let (macho, data) = find_header(data)?;
46             let endian = macho.endian().ok()?;
47             let obj = Object::parse(macho, endian, data)?;
48             Context::new(stash, obj, None)
49         })
50     }
51 
load_dsym(dir: &Path, uuid: [u8; 16]) -> Option<Mapping>52     fn load_dsym(dir: &Path, uuid: [u8; 16]) -> Option<Mapping> {
53         for entry in dir.read_dir().ok()? {
54             let entry = entry.ok()?;
55             let filename = match entry.file_name().into_string() {
56                 Ok(name) => name,
57                 Err(_) => continue,
58             };
59             if !filename.ends_with(".dSYM") {
60                 continue;
61             }
62             let candidates = entry.path().join("Contents/Resources/DWARF");
63             if let Some(mapping) = Mapping::try_dsym_candidate(&candidates, uuid) {
64                 return Some(mapping);
65             }
66         }
67         None
68     }
69 
try_dsym_candidate(dir: &Path, uuid: [u8; 16]) -> Option<Mapping>70     fn try_dsym_candidate(dir: &Path, uuid: [u8; 16]) -> Option<Mapping> {
71         // Look for files in the `DWARF` directory which have a matching uuid to
72         // the original object file. If we find one then we found the debug
73         // information.
74         for entry in dir.read_dir().ok()? {
75             let entry = entry.ok()?;
76             let map = super::mmap(&entry.path())?;
77             let candidate = Mapping::mk(map, |data, stash| {
78                 let (macho, data) = find_header(data)?;
79                 let endian = macho.endian().ok()?;
80                 let entry_uuid = macho.uuid(endian, data, 0).ok()??;
81                 if entry_uuid != uuid {
82                     return None;
83                 }
84                 let obj = Object::parse(macho, endian, data)?;
85                 Context::new(stash, obj, None)
86             });
87             if let Some(candidate) = candidate {
88                 return Some(candidate);
89             }
90         }
91 
92         None
93     }
94 }
95 
find_header(data: &'_ [u8]) -> Option<(&'_ Mach, &'_ [u8])>96 fn find_header(data: &'_ [u8]) -> Option<(&'_ Mach, &'_ [u8])> {
97     use object::endian::BigEndian;
98 
99     let desired_cpu = || {
100         if cfg!(target_arch = "x86") {
101             Some(macho::CPU_TYPE_X86)
102         } else if cfg!(target_arch = "x86_64") {
103             Some(macho::CPU_TYPE_X86_64)
104         } else if cfg!(target_arch = "arm") {
105             Some(macho::CPU_TYPE_ARM)
106         } else if cfg!(target_arch = "aarch64") {
107             Some(macho::CPU_TYPE_ARM64)
108         } else {
109             None
110         }
111     };
112 
113     let mut data = Bytes(data);
114     match data
115         .clone()
116         .read::<object::endian::U32<NativeEndian>>()
117         .ok()?
118         .get(NativeEndian)
119     {
120         macho::MH_MAGIC_64 | macho::MH_CIGAM_64 | macho::MH_MAGIC | macho::MH_CIGAM => {}
121 
122         macho::FAT_MAGIC | macho::FAT_CIGAM => {
123             let mut header_data = data;
124             let endian = BigEndian;
125             let header = header_data.read::<macho::FatHeader>().ok()?;
126             let nfat = header.nfat_arch.get(endian);
127             let arch = (0..nfat)
128                 .filter_map(|_| header_data.read::<macho::FatArch32>().ok())
129                 .find(|arch| desired_cpu() == Some(arch.cputype.get(endian)))?;
130             let offset = arch.offset.get(endian);
131             let size = arch.size.get(endian);
132             data = data
133                 .read_bytes_at(offset.try_into().ok()?, size.try_into().ok()?)
134                 .ok()?;
135         }
136 
137         macho::FAT_MAGIC_64 | macho::FAT_CIGAM_64 => {
138             let mut header_data = data;
139             let endian = BigEndian;
140             let header = header_data.read::<macho::FatHeader>().ok()?;
141             let nfat = header.nfat_arch.get(endian);
142             let arch = (0..nfat)
143                 .filter_map(|_| header_data.read::<macho::FatArch64>().ok())
144                 .find(|arch| desired_cpu() == Some(arch.cputype.get(endian)))?;
145             let offset = arch.offset.get(endian);
146             let size = arch.size.get(endian);
147             data = data
148                 .read_bytes_at(offset.try_into().ok()?, size.try_into().ok()?)
149                 .ok()?;
150         }
151 
152         _ => return None,
153     }
154 
155     Mach::parse(data.0, 0).ok().map(|h| (h, data.0))
156 }
157 
158 // This is used both for executables/libraries and source object files.
159 pub struct Object<'a> {
160     endian: NativeEndian,
161     data: &'a [u8],
162     dwarf: Option<&'a [MachSection]>,
163     syms: Vec<(&'a [u8], u64)>,
164     syms_sort_by_name: bool,
165     // Only set for executables/libraries, and not the source object files.
166     object_map: Option<object::ObjectMap<'a>>,
167     // The outer Option is for lazy loading, and the inner Option allows load errors to be cached.
168     object_mappings: Box<[Option<Option<Mapping>>]>,
169 }
170 
171 impl<'a> Object<'a> {
parse(mach: &'a Mach, endian: NativeEndian, data: &'a [u8]) -> Option<Object<'a>>172     fn parse(mach: &'a Mach, endian: NativeEndian, data: &'a [u8]) -> Option<Object<'a>> {
173         let is_object = mach.filetype(endian) == object::macho::MH_OBJECT;
174         let mut dwarf = None;
175         let mut syms = Vec::new();
176         let mut syms_sort_by_name = false;
177         let mut commands = mach.load_commands(endian, data, 0).ok()?;
178         let mut object_map = None;
179         let mut object_mappings = Vec::new();
180         while let Ok(Some(command)) = commands.next() {
181             if let Some((segment, section_data)) = MachSegment::from_command(command).ok()? {
182                 // Object files should have all sections in a single unnamed segment load command.
183                 if segment.name() == b"__DWARF" || (is_object && segment.name() == b"") {
184                     dwarf = segment.sections(endian, section_data).ok();
185                 }
186             } else if let Some(symtab) = command.symtab().ok()? {
187                 let symbols = symtab.symbols::<Mach, _>(endian, data).ok()?;
188                 syms = symbols
189                     .iter()
190                     .filter_map(|nlist: &MachNlist| {
191                         let name = nlist.name(endian, symbols.strings()).ok()?;
192                         if name.len() > 0 && nlist.is_definition() {
193                             Some((name, u64::from(nlist.n_value(endian))))
194                         } else {
195                             None
196                         }
197                     })
198                     .collect();
199                 if is_object {
200                     // We never search object file symbols by address.
201                     // Instead, we already know the symbol name from the executable, and we
202                     // need to search by name to find the matching symbol in the object file.
203                     syms.sort_unstable_by_key(|(name, _)| *name);
204                     syms_sort_by_name = true;
205                 } else {
206                     syms.sort_unstable_by_key(|(_, addr)| *addr);
207                     let map = symbols.object_map(endian);
208                     object_mappings.resize_with(map.objects().len(), || None);
209                     object_map = Some(map);
210                 }
211             }
212         }
213 
214         Some(Object {
215             endian,
216             data,
217             dwarf,
218             syms,
219             syms_sort_by_name,
220             object_map,
221             object_mappings: object_mappings.into_boxed_slice(),
222         })
223     }
224 
section(&self, _: &Stash, name: &str) -> Option<&'a [u8]>225     pub fn section(&self, _: &Stash, name: &str) -> Option<&'a [u8]> {
226         let name = name.as_bytes();
227         let dwarf = self.dwarf?;
228         let section = dwarf.into_iter().find(|section| {
229             let section_name = section.name();
230             section_name == name || {
231                 section_name.starts_with(b"__")
232                     && name.starts_with(b".")
233                     && &section_name[2..] == &name[1..]
234             }
235         })?;
236         Some(section.data(self.endian, self.data).ok()?)
237     }
238 
search_symtab<'b>(&'b self, addr: u64) -> Option<&'b [u8]>239     pub fn search_symtab<'b>(&'b self, addr: u64) -> Option<&'b [u8]> {
240         debug_assert!(!self.syms_sort_by_name);
241         let i = match self.syms.binary_search_by_key(&addr, |(_, addr)| *addr) {
242             Ok(i) => i,
243             Err(i) => i.checked_sub(1)?,
244         };
245         let (sym, _addr) = self.syms.get(i)?;
246         Some(sym)
247     }
248 
249     /// Try to load a context for an object file.
250     ///
251     /// If dsymutil was not run, then the DWARF may be found in the source object files.
search_object_map<'b>(&'b mut self, addr: u64) -> Option<(&Context<'b>, u64)>252     pub(super) fn search_object_map<'b>(&'b mut self, addr: u64) -> Option<(&Context<'b>, u64)> {
253         // `object_map` contains a map from addresses to symbols and object paths.
254         // Look up the address and get a mapping for the object.
255         let object_map = self.object_map.as_ref()?;
256         let symbol = object_map.get(addr)?;
257         let object_index = symbol.object_index();
258         let mapping = self.object_mappings.get_mut(object_index)?;
259         if mapping.is_none() {
260             // No cached mapping, so create it.
261             *mapping = Some(object_mapping(object_map.objects().get(object_index)?));
262         }
263         let cx: &'b Context<'static> = &mapping.as_ref()?.as_ref()?.cx;
264         // Don't leak the `'static` lifetime, make sure it's scoped to just ourselves.
265         let cx = unsafe { core::mem::transmute::<&'b Context<'static>, &'b Context<'b>>(cx) };
266 
267         // We must translate the address in order to be able to look it up
268         // in the DWARF in the object file.
269         debug_assert!(cx.object.syms.is_empty() || cx.object.syms_sort_by_name);
270         let i = cx
271             .object
272             .syms
273             .binary_search_by_key(&symbol.name(), |(name, _)| *name)
274             .ok()?;
275         let object_symbol = cx.object.syms.get(i)?;
276         let object_addr = addr
277             .wrapping_sub(symbol.address())
278             .wrapping_add(object_symbol.1);
279         Some((cx, object_addr))
280     }
281 }
282 
object_mapping(path: &[u8]) -> Option<Mapping>283 fn object_mapping(path: &[u8]) -> Option<Mapping> {
284     use super::mystd::ffi::OsStr;
285     use super::mystd::os::unix::prelude::*;
286 
287     let map;
288 
289     // `N_OSO` symbol names can be either `/path/to/object.o` or `/path/to/archive.a(object.o)`.
290     let member_name = if let Some((archive_path, member_name)) = split_archive_path(path) {
291         map = super::mmap(Path::new(OsStr::from_bytes(archive_path)))?;
292         Some(member_name)
293     } else {
294         map = super::mmap(Path::new(OsStr::from_bytes(path)))?;
295         None
296     };
297     Mapping::mk(map, |data, stash| {
298         let data = match member_name {
299             Some(member_name) => {
300                 let archive = object::read::archive::ArchiveFile::parse(data).ok()?;
301                 let member = archive
302                     .members()
303                     .filter_map(Result::ok)
304                     .find(|m| m.name() == member_name)?;
305                 member.data(data).ok()?
306             }
307             None => data,
308         };
309         let (macho, data) = find_header(data)?;
310         let endian = macho.endian().ok()?;
311         let obj = Object::parse(macho, endian, data)?;
312         Context::new(stash, obj, None)
313     })
314 }
315 
split_archive_path(path: &[u8]) -> Option<(&[u8], &[u8])>316 fn split_archive_path(path: &[u8]) -> Option<(&[u8], &[u8])> {
317     let (last, path) = path.split_last()?;
318     if *last != b')' {
319         return None;
320     }
321     let index = path.iter().position(|&x| x == b'(')?;
322     let (archive, rest) = path.split_at(index);
323     Some((archive, &rest[1..]))
324 }
325