1 //! Symbols exported by this binary and available for dynamic linking are encoded in mach-o binaries using a special trie
2 //!
3 //! **Note**: the trie is constructed lazily in case it won't be used, and since computing exports will require allocation, to compute the exports, you need call the export trie's [exports()](struct.ExportTrie.html#method.exports) method.
4 
5 // TODO:
6 // (1) Weak of regular_symbol_info type probably needs to be added ?
7 // (3) /usr/lib/libstdc++.6.0.9.dylib has flag 0xc at many offsets... they're weak
8 
9 use core::ops::Range;
10 use scroll::{Pread, Uleb128};
11 use crate::error;
12 use core::fmt::{self, Debug};
13 use crate::mach::load_command;
14 use alloc::vec::Vec;
15 use alloc::string::String;
16 
17 type Flag = u64;
18 
19  // "The following are used on the flags byte of a terminal node
20  // in the export information."
21 pub const EXPORT_SYMBOL_FLAGS_KIND_MASK         : Flag = 0x03;
22 pub const EXPORT_SYMBOL_FLAGS_KIND_REGULAR      : Flag = 0x00;
23 pub const EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE     : Flag = 0x02; // this is a symbol not present in the loader.h but only in the dyld compressed image loader source code, and only available with a #def macro for export flags but libobjc. def has this
24 pub const EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL : Flag = 0x01;
25 pub const EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION   : Flag = 0x04;
26 pub const EXPORT_SYMBOL_FLAGS_REEXPORT          : Flag = 0x08;
27 pub const EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER : Flag = 0x10;
28 
29 #[derive(Debug)]
30 pub enum SymbolKind {
31     Regular,
32     Absolute,
33     ThreadLocal,
34     UnknownSymbolKind(Flag),
35 }
36 
37 impl SymbolKind {
new(kind: Flag) -> SymbolKind38     pub fn new(kind: Flag) -> SymbolKind {
39         match kind & EXPORT_SYMBOL_FLAGS_KIND_MASK {
40             0x00 => SymbolKind::Regular,
41             0x01 => SymbolKind::ThreadLocal,
42             0x02 => SymbolKind::Absolute,
43             _    => SymbolKind::UnknownSymbolKind(kind),
44         }
45     }
to_str(&self) -> &'static str46     pub fn to_str(&self) -> &'static str {
47         match self {
48             SymbolKind::Regular => "Regular",
49             SymbolKind::Absolute => "Absolute",
50             SymbolKind::ThreadLocal => "Thread_LOCAL",
51             SymbolKind::UnknownSymbolKind(_k) => "Unknown",
52         }
53     }
54 }
55 
56 #[derive(Debug)]
57 /// An export can be a regular export, a re-export, or a stub
58 pub enum ExportInfo<'a> {
59     /// A regular exported symbol, which is an address where it is found, and the flags associated with it
60     Regular {
61         address: u64,
62         flags: Flag,
63     },
64     /// if lib_symbol_name None then same symbol name, otherwise reexport of lib_symbol_name with name in the trie
65     /// "If the string is zero length, then the symbol is re-export from the specified dylib with the same name"
66     Reexport {
67         lib: &'a str,
68         lib_symbol_name: Option<&'a str>,
69         flags: Flag,
70     },
71     /// If the flags is `EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER`, then following the flags are two `Uleb128`s: the stub offset and the resolver offset. The stub is used by non-lazy pointers.  The resolver is used by lazy pointers and must be called to get the actual address to use
72     Stub {
73         stub_offset: scroll::Uleb128,
74         resolver_offset: scroll::Uleb128,
75         flags: Flag,
76     },
77 }
78 
79 impl<'a> ExportInfo<'a> {
80     /// Parse out the export info from `bytes`, at `offset`
parse(bytes: &'a [u8], libs: &[&'a str], flags: Flag, mut offset: usize) -> error::Result<ExportInfo<'a>>81     pub fn parse(bytes: &'a [u8], libs: &[&'a str], flags: Flag, mut offset: usize) -> error::Result<ExportInfo<'a>> {
82         use self::ExportInfo::*;
83         let regular = |offset| -> error::Result<ExportInfo> {
84             let address = bytes.pread::<Uleb128>(offset)?;
85             Ok(Regular {
86                 address: address.into(),
87                 flags
88             })
89         };
90         let reexport = |mut offset| -> error::Result<ExportInfo<'a>> {
91             let lib_ordinal: u64 = {
92                 let tmp = bytes.pread::<Uleb128>(offset)?;
93                 offset += tmp.size();
94                 tmp.into()
95             };
96             let lib_symbol_name = bytes.pread::<&str>(offset)?;
97             let lib = libs[lib_ordinal as usize];
98             let lib_symbol_name = if lib_symbol_name == "" { None } else { Some (lib_symbol_name)};
99             Ok(Reexport {
100                 lib,
101                 lib_symbol_name,
102                 flags
103             })
104         };
105         match SymbolKind::new(flags) {
106             SymbolKind::Regular => {
107                 if flags & EXPORT_SYMBOL_FLAGS_REEXPORT != 0 {
108                     reexport(offset)
109                 } else if flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER != 0 { // 0x10
110                     let stub_offset = bytes.pread::<Uleb128>(offset)?;
111                     offset += stub_offset.size();
112                     let resolver_offset = bytes.pread::<Uleb128>(offset)?;
113                     Ok(Stub {
114                         stub_offset,
115                         resolver_offset,
116                         flags
117                     })
118                     // else if (flags = kEXPORT_SYMBOL_FLAGS_WEAK_DEFINITION) then (*0x40 unused*)
119                 } else {
120                     regular(offset)
121                 }
122             },
123             SymbolKind::ThreadLocal | SymbolKind::Absolute => {
124                 if flags & EXPORT_SYMBOL_FLAGS_REEXPORT != 0 {
125                     reexport(offset)
126                 } else {
127                     regular(offset)
128                 }
129             },
130             SymbolKind::UnknownSymbolKind(_kind) => {
131                 // 0x5f causes errors, but parsing as regular symbol resolves...
132                 //Err(error::Error::Malformed(format!("Unknown kind {:#x} from flags {:#x} in get_symbol_type at offset {}", kind, flags, offset)))
133                 regular(offset)
134             }
135         }
136     }
137 }
138 
139 #[derive(Debug)]
140 /// A finalized symbolic export reconstructed from the export trie
141 pub struct Export<'a> {
142     /// The reconsituted export name which dyld matches against
143     pub name: String,
144     /// The export info in the node data
145     pub info: ExportInfo<'a>,
146     /// How large this export is
147     pub size: usize,
148     /// The offset this symbol export is found in the binary
149     pub offset: u64,
150 }
151 
152 impl<'a> Export<'a> {
153     /// Create a new export from `name` and `info`
new(name: String, info: ExportInfo<'a>) -> Export<'a>154     pub fn new(name: String, info: ExportInfo<'a>) -> Export<'a> {
155         let offset = match info {
156             ExportInfo::Regular { address, .. } => address,
157             _ => 0x0,
158         };
159         Export { name, info, size: 0, offset }
160     }
161 }
162 
163 /// An export trie efficiently encodes all of the symbols exported by this binary for dynamic linking
164 pub struct ExportTrie<'a> {
165     data: &'a [u8],
166     location: Range<usize>,
167 }
168 
169 impl<'a> ExportTrie<'a> {
170 
171     #[inline]
walk_nodes(&self, libs: &[&'a str], branches: Vec<(String, usize)>, acc: &mut Vec<Export<'a>>) -> error::Result<()>172     fn walk_nodes(&self, libs: &[&'a str], branches: Vec<(String, usize)>, acc: &mut Vec<Export<'a>>) -> error::Result<()> {
173         for (symbol, next_node) in branches {
174             self.walk_trie(libs, symbol, next_node, acc)?;
175         }
176         Ok(())
177     }
178 
179     // current_symbol can be a str iiuc
walk_branches(&self, nbranches: usize, current_symbol: String, mut offset: usize) -> error::Result<Vec<(String, usize)>>180     fn walk_branches(&self, nbranches: usize, current_symbol: String, mut offset: usize) -> error::Result<Vec<(String, usize)>> {
181         let mut branches = Vec::with_capacity(nbranches);
182         //println!("\t@{:#x}", *offset);
183         for _i in 0..nbranches {
184             // additional offset calculations are relative to the base we received
185             let offset = &mut offset;
186             let string = self.data.pread::<&str>(*offset)?;
187             let mut key = current_symbol.clone();
188             key.push_str(string);
189             // +1 for null terminator
190             *offset = *offset + string.len() + 1;
191             //println!("\t({}) string_len: {} offset: {:#x}", i, string.len(), *offset);
192             // value is relative to export trie base
193             let next_node = Uleb128::read(&self.data, offset)? as usize + self.location.start;
194             //println!("\t({}) string: {} next_node: {:#x}", _i, key, next_node);
195             branches.push((key, next_node));
196         }
197         Ok(branches)
198     }
199 
walk_trie(&self, libs: &[&'a str], current_symbol: String, start: usize, exports: &mut Vec<Export<'a>>) -> error::Result<()>200     fn walk_trie(&self, libs: &[&'a str], current_symbol: String, start: usize, exports: &mut Vec<Export<'a>>) -> error::Result<()> {
201         if start < self.location.end {
202             let mut offset = start;
203             let terminal_size = Uleb128::read(&self.data, &mut offset)?;
204             // let mut input = String::new();
205             // ::std::io::stdin().read_line(&mut input).unwrap();
206             // println!("@ {:#x} node: {:#x} current_symbol: {}", start, terminal_size, current_symbol);
207             if terminal_size == 0 {
208                 let nbranches = Uleb128::read(&self.data, &mut offset)? as usize;
209                 //println!("\t@ {:#x} BRAN {}", *offset, nbranches);
210                 let branches = self.walk_branches(nbranches, current_symbol, offset)?;
211                 self.walk_nodes(libs, branches, exports)
212             } else { // terminal node, but the tricky part is that they can have children...
213                 let pos = offset;
214                 let children_start = &mut (pos + terminal_size as usize);
215                 let nchildren = Uleb128::read(&self.data, children_start)? as usize;
216                 let flags = Uleb128::read(&self.data, &mut offset)?;
217                 //println!("\t@ {:#x} TERM {} flags: {:#x}", offset, nchildren, flags);
218                 let info = ExportInfo::parse(&self.data, libs, flags, offset)?;
219                 let export = Export::new(current_symbol.clone(), info);
220                 //println!("\t{:?}", &export);
221                 exports.push(export);
222                 if nchildren == 0 {
223                     // this branch is done
224                     Ok(())
225                 } else {
226                     // more branches to walk
227                     let branches = self.walk_branches(nchildren, current_symbol, *children_start)?;
228                     self.walk_nodes(libs, branches, exports)
229                 }
230             }
231         } else { Ok(()) }
232     }
233 
234     /// Walk the export trie for symbols exported by this binary, using the provided `libs` to resolve re-exports
exports(&self, libs: &[&'a str]) -> error::Result<Vec<Export<'a>>>235     pub fn exports(&self, libs: &[&'a str]) -> error::Result<Vec<Export<'a>>> {
236         let offset = self.location.start;
237         let current_symbol = String::new();
238         let mut exports = Vec::new();
239         self.walk_trie(libs, current_symbol, offset, &mut exports)?;
240         Ok(exports)
241     }
242 
243     /// Create a new, lazy, zero-copy export trie from the `DyldInfo` `command`
new(bytes: &'a [u8], command: &load_command::DyldInfoCommand) -> Self244     pub fn new(bytes: &'a [u8], command: &load_command::DyldInfoCommand) -> Self {
245         let start = command.export_off as usize;
246         let end = (command.export_size + command.export_off) as usize;
247         ExportTrie {
248             data: bytes,
249             location: start..end,
250         }
251     }
252 }
253 
254 impl<'a> Debug for ExportTrie<'a> {
fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result255     fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
256         fmt.debug_struct("ExportTrie")
257             .field("data", &"<... redacted ...>")
258             .field("location", &format_args!("{:#x}..{:#x}", self.location.start, self.location.end))
259             .finish()
260     }
261 }
262 
263 #[cfg(test)]
264 mod tests {
265     use super::*;
266     #[test]
export_trie()267     fn export_trie () {
268         const EXPORTS: [u8; 64] = [0x00,0x01,0x5f,0x00,0x05,0x00,0x02,0x5f,0x6d,0x68,0x5f,0x65,0x78,0x65,0x63,0x75,0x74,0x65,0x5f,0x68,0x65,0x61,0x64,0x65,0x72,0x00,0x1f,0x6d,0x61,0x00,0x23,0x02,0x00,0x00,0x00,0x00,0x02,0x78,0x69,0x6d,0x75,0x6d,0x00,0x30,0x69,0x6e,0x00,0x35,0x03,0x00,0xc0,0x1e,0x00,0x03,0x00,0xd0,0x1e,0x00,0x00,0x00,0x00,0x00,0x00,0x00];
269         let exports = &EXPORTS[..];
270         let libs = vec!["/usr/lib/libderp.so", "/usr/lib/libthuglife.so"];
271         let mut command = load_command::DyldInfoCommand::default();
272         command.export_size = exports.len() as u32;
273         let trie = ExportTrie::new(&exports, &command);
274         println!("trie: {:#?}", &trie);
275         let exports = trie.exports(&libs).unwrap();
276         println!("len: {} exports: {:#?}", exports.len(), &exports);
277         assert_eq!(exports.len() as usize, 3usize)
278     }
279 }
280