1 //! Support for symbolication using the `gimli` crate on crates.io
2 //!
3 //! This implementation is largely a work in progress and is off by default for
4 //! all platforms, but it's hoped to be developed over time! Long-term this is
5 //! intended to wholesale replace the `libbacktrace.rs` implementation.
6 
7 use self::gimli::read::EndianSlice;
8 use self::gimli::LittleEndian as Endian;
9 use self::mmap::Mmap;
10 use self::stash::Stash;
11 use crate::symbolize::ResolveWhat;
12 use crate::types::BytesOrWideString;
13 use crate::SymbolName;
14 use addr2line::gimli;
15 use core::convert::TryInto;
16 use core::mem;
17 use core::u32;
18 use libc::c_void;
19 use std::ffi::OsString;
20 use std::fs::File;
21 use std::path::Path;
22 use std::prelude::v1::*;
23 
24 #[cfg(windows)]
25 #[path = "gimli/mmap_windows.rs"]
26 mod mmap;
27 #[cfg(unix)]
28 #[path = "gimli/mmap_unix.rs"]
29 mod mmap;
30 mod stash;
31 
32 const MAPPINGS_CACHE_SIZE: usize = 4;
33 
34 struct Context<'a> {
35     dwarf: addr2line::Context<EndianSlice<'a, Endian>>,
36     object: Object<'a>,
37 }
38 
39 struct Mapping {
40     // 'static lifetime is a lie to hack around lack of support for self-referential structs.
41     cx: Context<'static>,
42     _map: Mmap,
43     _stash: Stash,
44 }
45 
cx<'data>(stash: &'data Stash, object: Object<'data>) -> Option<Context<'data>>46 fn cx<'data>(stash: &'data Stash, object: Object<'data>) -> Option<Context<'data>> {
47     fn load_section<'data, S>(stash: &'data Stash, obj: &Object<'data>) -> S
48     where
49         S: gimli::Section<gimli::EndianSlice<'data, Endian>>,
50     {
51         let data = obj.section(stash, S::section_name()).unwrap_or(&[]);
52         S::from(EndianSlice::new(data, Endian))
53     }
54 
55     let dwarf = addr2line::Context::from_sections(
56         load_section(stash, &object),
57         load_section(stash, &object),
58         load_section(stash, &object),
59         load_section(stash, &object),
60         load_section(stash, &object),
61         load_section(stash, &object),
62         load_section(stash, &object),
63         load_section(stash, &object),
64         load_section(stash, &object),
65         gimli::EndianSlice::new(&[], Endian),
66     )
67     .ok()?;
68     Some(Context { dwarf, object })
69 }
70 
71 macro_rules! mk {
72     (Mapping { $map:expr, $inner:expr, $stash:expr }) => {{
73         use crate::symbolize::gimli::{Context, Mapping, Mmap};
74 
75         fn assert_lifetimes<'a>(_: &'a Mmap, _: &Context<'a>, _: &'a Stash) {}
76         assert_lifetimes(&$map, &$inner, &$stash);
77         Mapping {
78             // Convert to 'static lifetimes since the symbols should
79             // only borrow `map` and `stash` and we're preserving them below.
80             cx: unsafe { core::mem::transmute::<Context<'_>, Context<'static>>($inner) },
81             _map: $map,
82             _stash: $stash,
83         }
84     }};
85 }
86 
mmap(path: &Path) -> Option<Mmap>87 fn mmap(path: &Path) -> Option<Mmap> {
88     let file = File::open(path).ok()?;
89     let len = file.metadata().ok()?.len().try_into().ok()?;
90     unsafe { Mmap::map(&file, len) }
91 }
92 
93 cfg_if::cfg_if! {
94     if #[cfg(windows)] {
95         use core::mem::MaybeUninit;
96         use crate::windows::*;
97         use std::os::windows::prelude::*;
98 
99         mod coff;
100         use self::coff::Object;
101 
102         // For loading native libraries on Windows, see some discussion on
103         // rust-lang/rust#71060 for the various strategies here.
104         fn native_libraries() -> Vec<Library> {
105             let mut ret = Vec::new();
106             unsafe { add_loaded_images(&mut ret); }
107             return ret;
108         }
109 
110         unsafe fn add_loaded_images(ret: &mut Vec<Library>) {
111             let snap = CreateToolhelp32Snapshot(TH32CS_SNAPMODULE, 0);
112             if snap == INVALID_HANDLE_VALUE {
113                 return;
114             }
115 
116             let mut me = MaybeUninit::<MODULEENTRY32W>::zeroed().assume_init();
117             me.dwSize = mem::size_of_val(&me) as DWORD;
118             if Module32FirstW(snap, &mut me) == TRUE {
119                 loop {
120                     if let Some(lib) = load_library(&me) {
121                         ret.push(lib);
122                     }
123 
124                     if Module32NextW(snap, &mut me) != TRUE {
125                         break;
126                     }
127                 }
128 
129             }
130 
131             CloseHandle(snap);
132         }
133 
134         unsafe fn load_library(me: &MODULEENTRY32W) -> Option<Library> {
135             let pos = me
136                 .szExePath
137                 .iter()
138                 .position(|i| *i == 0)
139                 .unwrap_or(me.szExePath.len());
140             let name = OsString::from_wide(&me.szExePath[..pos]);
141 
142             // MinGW libraries currently don't support ASLR
143             // (rust-lang/rust#16514), but DLLs can still be relocated around in
144             // the address space. It appears that addresses in debug info are
145             // all as-if this library was loaded at its "image base", which is a
146             // field in its COFF file headers. Since this is what debuginfo
147             // seems to list we parse the symbol table and store addresses as if
148             // the library was loaded at "image base" as well.
149             //
150             // The library may not be loaded at "image base", however.
151             // (presumably something else may be loaded there?) This is where
152             // the `bias` field comes into play, and we need to figure out the
153             // value of `bias` here. Unfortunately though it's not clear how to
154             // acquire this from a loaded module. What we do have, however, is
155             // the actual load address (`modBaseAddr`).
156             //
157             // As a bit of a cop-out for now we mmap the file, read the file
158             // header information, then drop the mmap. This is wasteful because
159             // we'll probably reopen the mmap later, but this should work well
160             // enough for now.
161             //
162             // Once we have the `image_base` (desired load location) and the
163             // `base_addr` (actual load location) we can fill in the `bias`
164             // (difference between the actual and desired) and then the stated
165             // address of each segment is the `image_base` since that's what the
166             // file says.
167             //
168             // For now it appears that unlike ELF/MachO we can make do with one
169             // segment per library, using `modBaseSize` as the whole size.
170             let mmap = mmap(name.as_ref())?;
171             let image_base = coff::get_image_base(&mmap)?;
172             let base_addr = me.modBaseAddr as usize;
173             Some(Library {
174                 name,
175                 bias: base_addr.wrapping_sub(image_base),
176                 segments: vec![LibrarySegment {
177                     stated_virtual_memory_address: image_base,
178                     len: me.modBaseSize as usize,
179                 }],
180             })
181         }
182     } else if #[cfg(target_os = "macos")] {
183         // macOS uses the Mach-O file format and uses DYLD-specific APIs to
184         // load a list of native libraries that are part of the appplication.
185 
186         use std::os::unix::prelude::*;
187         use std::ffi::{OsStr, CStr};
188 
189         mod macho;
190         use self::macho::Object;
191 
192         #[allow(deprecated)]
193         fn native_libraries() -> Vec<Library> {
194             let mut ret = Vec::new();
195             let images = unsafe { libc::_dyld_image_count() };
196             for i in 0..images {
197                 ret.extend(native_library(i));
198             }
199             return ret;
200         }
201 
202         #[allow(deprecated)]
203         fn native_library(i: u32) -> Option<Library> {
204             use object::macho;
205             use object::read::macho::{MachHeader, Segment};
206             use object::{Bytes, NativeEndian};
207 
208             // Fetch the name of this library which corresponds to the path of
209             // where to load it as well.
210             let name = unsafe {
211                 let name = libc::_dyld_get_image_name(i);
212                 if name.is_null() {
213                     return None;
214                 }
215                 CStr::from_ptr(name)
216             };
217 
218             // Load the image header of this library and delegate to `object` to
219             // parse all the load commands so we can figure out all the segments
220             // involved here.
221             let (mut load_commands, endian) = unsafe {
222                 let header = libc::_dyld_get_image_header(i);
223                 if header.is_null() {
224                     return None;
225                 }
226                 match (*header).magic {
227                     macho::MH_MAGIC => {
228                         let endian = NativeEndian;
229                         let header = &*(header as *const macho::MachHeader32<NativeEndian>);
230                         let data = core::slice::from_raw_parts(
231                             header as *const _ as *const u8,
232                             mem::size_of_val(header) + header.sizeofcmds.get(endian) as usize
233                         );
234                         (header.load_commands(endian, Bytes(data)).ok()?, endian)
235                     }
236                     macho::MH_MAGIC_64 => {
237                         let endian = NativeEndian;
238                         let header = &*(header as *const macho::MachHeader64<NativeEndian>);
239                         let data = core::slice::from_raw_parts(
240                             header as *const _ as *const u8,
241                             mem::size_of_val(header) + header.sizeofcmds.get(endian) as usize
242                         );
243                         (header.load_commands(endian, Bytes(data)).ok()?, endian)
244                     }
245                     _ => return None,
246                 }
247             };
248 
249             // Iterate over the segments and register known regions for segments
250             // that we find. Additionally record information bout text segments
251             // for processing later, see comments below.
252             let mut segments = Vec::new();
253             let mut first_text = 0;
254             let mut text_fileoff_zero = false;
255             while let Some(cmd) = load_commands.next().ok()? {
256                 if let Some((seg, _)) = cmd.segment_32().ok()? {
257                     if seg.name() == b"__TEXT" {
258                         first_text = segments.len();
259                         if seg.fileoff(endian) == 0 && seg.filesize(endian) > 0 {
260                             text_fileoff_zero = true;
261                         }
262                     }
263                     segments.push(LibrarySegment {
264                         len: seg.vmsize(endian).try_into().ok()?,
265                         stated_virtual_memory_address: seg.vmaddr(endian).try_into().ok()?,
266                     });
267                 }
268                 if let Some((seg, _)) = cmd.segment_64().ok()? {
269                     if seg.name() == b"__TEXT" {
270                         first_text = segments.len();
271                         if seg.fileoff(endian) == 0 && seg.filesize(endian) > 0 {
272                             text_fileoff_zero = true;
273                         }
274                     }
275                     segments.push(LibrarySegment {
276                         len: seg.vmsize(endian).try_into().ok()?,
277                         stated_virtual_memory_address: seg.vmaddr(endian).try_into().ok()?,
278                     });
279                 }
280             }
281 
282             // Determine the "slide" for this library which ends up being the
283             // bias we use to figure out where in memory objects are loaded.
284             // This is a bit of a weird computation though and is the result of
285             // trying a few things in the wild and seeing what sticks.
286             //
287             // The general idea is that the `bias` plus a segment's
288             // `stated_virtual_memory_address` is going to be where in the
289             // actual address space the segment resides. The other thing we rely
290             // on though is that a real address minus the `bias` is the index to
291             // look up in the symbol table and debuginfo.
292             //
293             // It turns out, though, that for system loaded libraries these
294             // calculations are incorrect. For native executables, however, it
295             // appears correct. Lifting some logic from LLDB's source it has
296             // some special-casing for the first `__TEXT` section loaded from
297             // file offset 0 with a nonzero size. For whatever reason when this
298             // is present it appears to mean that the symbol table is relative
299             // to just the vmaddr slide for the library. If it's *not* present
300             // then the symbol table is relative to the the vmaddr slide plus
301             // the segment's stated address.
302             //
303             // To handle this situation if we *don't* find a text section at
304             // file offset zero then we increase the bias by the first text
305             // sections's stated address and decrease all stated addresses by
306             // that amount as well. That way the symbol table is always appears
307             // relative to the library's bias amount. This appears to have the
308             // right results for symbolizing via the symbol table.
309             //
310             // Honestly I'm not entirely sure whether this is right or if
311             // there's something else that should indicate how to do this. For
312             // now though this seems to work well enough (?) and we should
313             // always be able to tweak this over time if necessary.
314             //
315             // For some more information see #318
316             let mut slide = unsafe { libc::_dyld_get_image_vmaddr_slide(i) as usize };
317             if !text_fileoff_zero {
318                 let adjust = segments[first_text].stated_virtual_memory_address;
319                 for segment in segments.iter_mut() {
320                     segment.stated_virtual_memory_address -= adjust;
321                 }
322                 slide += adjust;
323             }
324 
325             Some(Library {
326                 name: OsStr::from_bytes(name.to_bytes()).to_owned(),
327                 segments,
328                 bias: slide,
329             })
330         }
331     } else if #[cfg(any(
332         target_os = "linux",
333         target_os = "fuchsia",
334     ))] {
335         // Other Unix (e.g. Linux) platforms use ELF as an object file format
336         // and typically implement an API called `dl_iterate_phdr` to load
337         // native libraries.
338 
339         use std::os::unix::prelude::*;
340         use std::ffi::{OsStr, CStr};
341 
342         mod elf;
343         use self::elf::Object;
344 
345         fn native_libraries() -> Vec<Library> {
346             let mut ret = Vec::new();
347             unsafe {
348                 libc::dl_iterate_phdr(Some(callback), &mut ret as *mut _ as *mut _);
349             }
350             return ret;
351         }
352 
353         unsafe extern "C" fn callback(
354             info: *mut libc::dl_phdr_info,
355             _size: libc::size_t,
356             vec: *mut libc::c_void,
357         ) -> libc::c_int {
358             let libs = &mut *(vec as *mut Vec<Library>);
359             let name = if (*info).dlpi_name.is_null() || *(*info).dlpi_name == 0{
360                 if libs.is_empty() {
361                     std::env::current_exe().map(|e| e.into()).unwrap_or_default()
362                 } else {
363                     OsString::new()
364                 }
365             } else {
366                 let bytes = CStr::from_ptr((*info).dlpi_name).to_bytes();
367                 OsStr::from_bytes(bytes).to_owned()
368             };
369             let headers = core::slice::from_raw_parts((*info).dlpi_phdr, (*info).dlpi_phnum as usize);
370             libs.push(Library {
371                 name,
372                 segments: headers
373                     .iter()
374                     .map(|header| LibrarySegment {
375                         len: (*header).p_memsz as usize,
376                         stated_virtual_memory_address: (*header).p_vaddr as usize,
377                     })
378                     .collect(),
379                 bias: (*info).dlpi_addr as usize,
380             });
381             0
382         }
383     } else {
384         // Everything else should use ELF, but doesn't know how to load native
385         // libraries.
386 
387         mod elf;
388         use self::elf::Object;
389 
390         fn native_libraries() -> Vec<Library> {
391             Vec::new()
392         }
393     }
394 }
395 
396 #[derive(Default)]
397 struct Cache {
398     /// All known shared libraries that have been loaded.
399     libraries: Vec<Library>,
400 
401     /// Mappings cache where we retain parsed dwarf information.
402     ///
403     /// This list has a fixed capacity for its entire liftime which never
404     /// increases. The `usize` element of each pair is an index into `libraries`
405     /// above where `usize::max_value()` represents the current executable. The
406     /// `Mapping` is corresponding parsed dwarf information.
407     ///
408     /// Note that this is basically an LRU cache and we'll be shifting things
409     /// around in here as we symbolize addresses.
410     mappings: Vec<(usize, Mapping)>,
411 }
412 
413 struct Library {
414     name: OsString,
415     /// Segments of this library loaded into memory, and where they're loaded.
416     segments: Vec<LibrarySegment>,
417     /// The "bias" of this library, typically where it's loaded into memory.
418     /// This value is added to each segment's stated address to get the actual
419     /// virtual memory address that the segment is loaded into. Additionally
420     /// this bias is subtracted from real virtual memory addresses to index into
421     /// debuginfo and the symbol table.
422     bias: usize,
423 }
424 
425 struct LibrarySegment {
426     /// The stated address of this segment in the object file. This is not
427     /// actually where the segment is loaded, but rather this address plus the
428     /// containing library's `bias` is where to find it.
429     stated_virtual_memory_address: usize,
430     /// The size of ths segment in memory.
431     len: usize,
432 }
433 
434 // unsafe because this is required to be externally synchronized
clear_symbol_cache()435 pub unsafe fn clear_symbol_cache() {
436     Cache::with_global(|cache| cache.mappings.clear());
437 }
438 
439 impl Cache {
new() -> Cache440     fn new() -> Cache {
441         Cache {
442             mappings: Vec::with_capacity(MAPPINGS_CACHE_SIZE),
443             libraries: native_libraries(),
444         }
445     }
446 
447     // unsafe because this is required to be externally synchronized
with_global(f: impl FnOnce(&mut Self))448     unsafe fn with_global(f: impl FnOnce(&mut Self)) {
449         // A very small, very simple LRU cache for debug info mappings.
450         //
451         // The hit rate should be very high, since the typical stack doesn't cross
452         // between many shared libraries.
453         //
454         // The `addr2line::Context` structures are pretty expensive to create. Its
455         // cost is expected to be amortized by subsequent `locate` queries, which
456         // leverage the structures built when constructing `addr2line::Context`s to
457         // get nice speedups. If we didn't have this cache, that amortization would
458         // never happen, and symbolicating backtraces would be ssssllllooooowwww.
459         static mut MAPPINGS_CACHE: Option<Cache> = None;
460 
461         f(MAPPINGS_CACHE.get_or_insert_with(|| Cache::new()))
462     }
463 
avma_to_svma(&self, addr: *const u8) -> Option<(usize, *const u8)>464     fn avma_to_svma(&self, addr: *const u8) -> Option<(usize, *const u8)> {
465         self.libraries
466             .iter()
467             .enumerate()
468             .filter_map(|(i, lib)| {
469                 // First up, test if this `lib` has any segment containing the
470                 // `addr` (handling relocation). If this check passes then we
471                 // can continue below and actually translate the address.
472                 //
473                 // Note that we're using `wrapping_add` here to avoid overflow
474                 // checks. It's been seen in the wild that the SVMA + bias
475                 // computation overflows. It seems a bit odd that would happen
476                 // but there's not a huge amount we can do about it other than
477                 // probably just ignore those segments since they're likely
478                 // pointing off into space. This originally came up in
479                 // rust-lang/backtrace-rs#329.
480                 if !lib.segments.iter().any(|s| {
481                     let svma = s.stated_virtual_memory_address;
482                     let start = svma.wrapping_add(lib.bias);
483                     let end = start.wrapping_add(s.len);
484                     let address = addr as usize;
485                     start <= address && address < end
486                 }) {
487                     return None;
488                 }
489 
490                 // Now that we know `lib` contains `addr`, we can offset with
491                 // the bias to find the stated virutal memory address.
492                 let svma = (addr as usize).wrapping_sub(lib.bias);
493                 Some((i, svma as *const u8))
494             })
495             .next()
496     }
497 
mapping_for_lib<'a>(&'a mut self, lib: usize) -> Option<&'a Context<'a>>498     fn mapping_for_lib<'a>(&'a mut self, lib: usize) -> Option<&'a Context<'a>> {
499         let idx = self.mappings.iter().position(|(idx, _)| *idx == lib);
500 
501         // Invariant: after this conditional completes without early returning
502         // from an error, the cache entry for this path is at index 0.
503 
504         if let Some(idx) = idx {
505             // When the mapping is already in the cache, move it to the front.
506             if idx != 0 {
507                 let entry = self.mappings.remove(idx);
508                 self.mappings.insert(0, entry);
509             }
510         } else {
511             // When the mapping is not in the cache, create a new mapping,
512             // insert it into the front of the cache, and evict the oldest cache
513             // entry if necessary.
514             let name = &self.libraries[lib].name;
515             let mapping = Mapping::new(name.as_ref())?;
516 
517             if self.mappings.len() == MAPPINGS_CACHE_SIZE {
518                 self.mappings.pop();
519             }
520 
521             self.mappings.insert(0, (lib, mapping));
522         }
523 
524         let cx: &'a Context<'static> = &self.mappings[0].1.cx;
525         // don't leak the `'static` lifetime, make sure it's scoped to just
526         // ourselves
527         Some(unsafe { mem::transmute::<&'a Context<'static>, &'a Context<'a>>(cx) })
528     }
529 }
530 
resolve(what: ResolveWhat<'_>, cb: &mut dyn FnMut(&super::Symbol))531 pub unsafe fn resolve(what: ResolveWhat<'_>, cb: &mut dyn FnMut(&super::Symbol)) {
532     let addr = what.address_or_ip();
533     let mut call = |sym: Symbol<'_>| {
534         // Extend the lifetime of `sym` to `'static` since we are unfortunately
535         // required to here, but it's ony ever going out as a reference so no
536         // reference to it should be persisted beyond this frame anyway.
537         let sym = mem::transmute::<Symbol<'_>, Symbol<'static>>(sym);
538         (cb)(&super::Symbol { inner: sym });
539     };
540 
541     Cache::with_global(|cache| {
542         let (lib, addr) = match cache.avma_to_svma(addr as *const u8) {
543             Some(pair) => pair,
544             None => return,
545         };
546 
547         // Finally, get a cached mapping or create a new mapping for this file, and
548         // evaluate the DWARF info to find the file/line/name for this address.
549         let cx = match cache.mapping_for_lib(lib) {
550             Some(cx) => cx,
551             None => return,
552         };
553         let mut any_frames = false;
554         if let Ok(mut frames) = cx.dwarf.find_frames(addr as u64) {
555             while let Ok(Some(frame)) = frames.next() {
556                 any_frames = true;
557                 call(Symbol::Frame {
558                     addr: addr as *mut c_void,
559                     location: frame.location,
560                     name: frame.function.map(|f| f.name.slice()),
561                 });
562             }
563         }
564 
565         if !any_frames {
566             if let Some(name) = cx.object.search_symtab(addr as u64) {
567                 call(Symbol::Symtab {
568                     addr: addr as *mut c_void,
569                     name,
570                 });
571             }
572         }
573     });
574 }
575 
576 pub enum Symbol<'a> {
577     /// We were able to locate frame information for this symbol, and
578     /// `addr2line`'s frame internally has all the nitty gritty details.
579     Frame {
580         addr: *mut c_void,
581         location: Option<addr2line::Location<'a>>,
582         name: Option<&'a [u8]>,
583     },
584     /// Couldn't find debug information, but we found it in the symbol table of
585     /// the elf executable.
586     Symtab { addr: *mut c_void, name: &'a [u8] },
587 }
588 
589 impl Symbol<'_> {
name(&self) -> Option<SymbolName<'_>>590     pub fn name(&self) -> Option<SymbolName<'_>> {
591         match self {
592             Symbol::Frame { name, .. } => {
593                 let name = name.as_ref()?;
594                 Some(SymbolName::new(name))
595             }
596             Symbol::Symtab { name, .. } => Some(SymbolName::new(name)),
597         }
598     }
599 
addr(&self) -> Option<*mut c_void>600     pub fn addr(&self) -> Option<*mut c_void> {
601         match self {
602             Symbol::Frame { addr, .. } => Some(*addr),
603             Symbol::Symtab { .. } => None,
604         }
605     }
606 
filename_raw(&self) -> Option<BytesOrWideString<'_>>607     pub fn filename_raw(&self) -> Option<BytesOrWideString<'_>> {
608         match self {
609             Symbol::Frame { location, .. } => {
610                 let file = location.as_ref()?.file?;
611                 Some(BytesOrWideString::Bytes(file.as_bytes()))
612             }
613             Symbol::Symtab { .. } => None,
614         }
615     }
616 
filename(&self) -> Option<&Path>617     pub fn filename(&self) -> Option<&Path> {
618         match self {
619             Symbol::Frame { location, .. } => {
620                 let file = location.as_ref()?.file?;
621                 Some(Path::new(file))
622             }
623             Symbol::Symtab { .. } => None,
624         }
625     }
626 
lineno(&self) -> Option<u32>627     pub fn lineno(&self) -> Option<u32> {
628         match self {
629             Symbol::Frame { location, .. } => location.as_ref()?.line,
630             Symbol::Symtab { .. } => None,
631         }
632     }
633 }
634