1 //! This crate can parse a C++ “mangled” linker symbol name into a Rust value
2 //! describing what the name refers to: a variable, a function, a virtual table,
3 //! etc. The description type implements `Display`, producing human-readable
4 //! text describing the mangled name. Debuggers and profilers can use this crate
5 //! to provide more meaningful output.
6 //!
7 //! C++ requires the compiler to choose names for linker symbols consistently
8 //! across compilation units, so that two compilation units that have seen the
9 //! same declarations can pair up definitions in one unit with references in
10 //! another.  Almost all platforms other than Microsoft Windows follow the
11 //! [Itanium C++ ABI][itanium]'s rules for this.
12 //!
13 //! [itanium]: http://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangle
14 //!
15 //! For example, suppose a C++ compilation unit has the definition:
16 //!
17 //! ```c++
18 //! namespace space {
19 //!   int foo(int x, int y) { return x+y; }
20 //! }
21 //! ```
22 //!
23 //! The Itanium C++ ABI specifies that the linker symbol for that function must
24 //! be named `_ZN5space3fooEii`. This crate can parse that name into a Rust
25 //! value representing its structure. Formatting the value with the `format!`
26 //! macro or the `std::string::ToString::to_string` trait method yields the
27 //! string `space::foo(int, int)`, which is more meaningful to the C++
28 //! developer.
29 
30 #![deny(missing_docs)]
31 #![deny(missing_debug_implementations)]
32 #![deny(unsafe_code)]
33 // Clippy stuff.
34 #![allow(unknown_lints)]
35 #![allow(clippy::inline_always)]
36 #![allow(clippy::redundant_field_names)]
37 #![cfg_attr(all(not(feature = "std"), feature = "alloc"), no_std)]
38 #![cfg_attr(all(not(feature = "std"), feature = "alloc"), feature(alloc))]
39 
40 #[macro_use]
41 extern crate cfg_if;
42 
43 cfg_if! {
44     if #[cfg(all(not(feature = "std"), feature = "alloc"))] {
45         extern crate core as std;
46         #[macro_use]
47         extern crate alloc;
48         mod imports {
49             pub use alloc::boxed;
50             pub use alloc::vec;
51             pub use alloc::string;
52             pub use alloc::borrow;
53             pub use alloc::collections::btree_map;
54         }
55     } else {
56         mod imports {
57             pub use std::boxed;
58             pub use std::vec;
59             pub use std::string;
60             pub use std::borrow;
61             pub use std::collections::btree_map;
62         }
63     }
64 }
65 
66 use imports::*;
67 
68 use string::String;
69 use vec::Vec;
70 
71 #[macro_use]
72 mod logging;
73 
74 pub mod ast;
75 pub mod error;
76 mod index_str;
77 mod subs;
78 
79 use ast::{Demangle, Parse, ParseContext};
80 use error::{Error, Result};
81 use index_str::IndexStr;
82 use std::fmt;
83 use std::num::NonZeroU32;
84 
85 /// Options to control the parsing process.
86 #[derive(Clone, Copy, Debug, Default)]
87 #[repr(C)]
88 pub struct ParseOptions {
89     recursion_limit: Option<NonZeroU32>,
90 }
91 
92 impl ParseOptions {
93     /// Set the limit on recursion depth during the parsing phase. A low
94     /// limit will cause valid symbols to be rejected, but a high limit may
95     /// allow pathological symbols to overflow the stack during parsing.
96     /// The default value is 96, which will not overflow the stack even in
97     /// a debug build.
recursion_limit(mut self, limit: u32) -> Self98     pub fn recursion_limit(mut self, limit: u32) -> Self {
99         self.recursion_limit = Some(NonZeroU32::new(limit).expect("Recursion limit must be > 0"));
100         self
101     }
102 }
103 
104 /// Options to control the demangling process.
105 #[derive(Clone, Copy, Debug, Default)]
106 #[repr(C)]
107 pub struct DemangleOptions {
108     no_params: bool,
109     no_return_type: bool,
110     recursion_limit: Option<NonZeroU32>,
111 }
112 
113 impl DemangleOptions {
114     /// Construct a new `DemangleOptions` with the default values.
new() -> Self115     pub fn new() -> Self {
116         Default::default()
117     }
118 
119     /// Do not display function arguments.
no_params(mut self) -> Self120     pub fn no_params(mut self) -> Self {
121         self.no_params = true;
122         self
123     }
124 
125     /// Do not display the function return type.
no_return_type(mut self) -> Self126     pub fn no_return_type(mut self) -> Self {
127         self.no_return_type = true;
128         self
129     }
130 
131     /// Set the limit on recursion depth during the demangling phase. A low
132     /// limit will cause valid symbols to be rejected, but a high limit may
133     /// allow pathological symbols to overflow the stack during demangling.
134     /// The default value is 128.
recursion_limit(mut self, limit: u32) -> Self135     pub fn recursion_limit(mut self, limit: u32) -> Self {
136         self.recursion_limit = Some(NonZeroU32::new(limit).expect("Recursion limit must be > 0"));
137         self
138     }
139 }
140 
141 /// A `Symbol` which owns the underlying storage for the mangled name.
142 pub type OwnedSymbol = Symbol<Vec<u8>>;
143 
144 /// A `Symbol` which borrows the underlying storage for the mangled name.
145 pub type BorrowedSymbol<'a> = Symbol<&'a [u8]>;
146 
147 /// A mangled symbol that has been parsed into an AST.
148 ///
149 /// This is generic over some storage type `T` which can be either owned or
150 /// borrowed. See the `OwnedSymbol` and `BorrowedSymbol` type aliases.
151 #[derive(Clone, Debug, PartialEq)]
152 pub struct Symbol<T> {
153     raw: T,
154     substitutions: subs::SubstitutionTable,
155     parsed: ast::MangledName,
156 }
157 
158 impl<T> Symbol<T>
159 where
160     T: AsRef<[u8]>,
161 {
162     /// Given some raw storage, parse the mangled symbol from it with the default
163     /// options.
164     ///
165     /// ```
166     /// use cpp_demangle::Symbol;
167     /// use std::string::ToString;
168     ///
169     /// // First, something easy :)
170     ///
171     /// let mangled = b"_ZN5space3fooEibc";
172     ///
173     /// let sym = Symbol::new(&mangled[..])
174     ///     .expect("Could not parse mangled symbol!");
175     ///
176     /// let demangled = sym.to_string();
177     /// assert_eq!(demangled, "space::foo(int, bool, char)");
178     ///
179     /// // Now let's try something a little more complicated!
180     ///
181     /// let mangled =
182     ///     b"__Z28JS_GetPropertyDescriptorByIdP9JSContextN2JS6HandleIP8JSObjectEENS2_I4jsidEENS1_13MutableHandleINS1_18PropertyDescriptorEEE";
183     ///
184     /// let sym = Symbol::new(&mangled[..])
185     ///     .expect("Could not parse mangled symbol!");
186     ///
187     /// let demangled = sym.to_string();
188     /// assert_eq!(
189     ///     demangled,
190     ///     "JS_GetPropertyDescriptorById(JSContext*, JS::Handle<JSObject*>, JS::Handle<jsid>, JS::MutableHandle<JS::PropertyDescriptor>)"
191     /// );
192     /// ```
193     #[inline]
new(raw: T) -> Result<Symbol<T>>194     pub fn new(raw: T) -> Result<Symbol<T>> {
195         Self::new_with_options(raw, &Default::default())
196     }
197 
198     /// Given some raw storage, parse the mangled symbol from it.
199     ///
200     /// ```
201     /// use cpp_demangle::{ParseOptions, Symbol};
202     /// use std::string::ToString;
203     ///
204     /// // First, something easy :)
205     ///
206     /// let mangled = b"_ZN5space3fooEibc";
207     ///
208     /// let parse_options = ParseOptions::default()
209     ///     .recursion_limit(1024);
210     ///
211     /// let sym = Symbol::new_with_options(&mangled[..], &parse_options)
212     ///     .expect("Could not parse mangled symbol!");
213     ///
214     /// let demangled = sym.to_string();
215     /// assert_eq!(demangled, "space::foo(int, bool, char)");
216     ///
217     /// // Now let's try something a little more complicated!
218     ///
219     /// let mangled =
220     ///     b"__Z28JS_GetPropertyDescriptorByIdP9JSContextN2JS6HandleIP8JSObjectEENS2_I4jsidEENS1_13MutableHandleINS1_18PropertyDescriptorEEE";
221     ///
222     /// let sym = Symbol::new(&mangled[..])
223     ///     .expect("Could not parse mangled symbol!");
224     ///
225     /// let demangled = sym.to_string();
226     /// assert_eq!(
227     ///     demangled,
228     ///     "JS_GetPropertyDescriptorById(JSContext*, JS::Handle<JSObject*>, JS::Handle<jsid>, JS::MutableHandle<JS::PropertyDescriptor>)"
229     /// );
230     /// ```
new_with_options(raw: T, options: &ParseOptions) -> Result<Symbol<T>>231     pub fn new_with_options(raw: T, options: &ParseOptions) -> Result<Symbol<T>> {
232         let mut substitutions = subs::SubstitutionTable::new();
233 
234         let parsed = {
235             let ctx = ParseContext::new(*options);
236             let input = IndexStr::new(raw.as_ref());
237 
238             let (parsed, tail) = ast::MangledName::parse(&ctx, &mut substitutions, input)?;
239             debug_assert!(ctx.recursion_level() == 0);
240 
241             if tail.is_empty() {
242                 parsed
243             } else {
244                 return Err(Error::UnexpectedText);
245             }
246         };
247 
248         let symbol = Symbol {
249             raw: raw,
250             substitutions: substitutions,
251             parsed: parsed,
252         };
253 
254         log!(
255             "Successfully parsed '{}' as
256 
257 AST = {:#?}
258 
259 substitutions = {:#?}",
260             String::from_utf8_lossy(symbol.raw.as_ref()),
261             symbol.parsed,
262             symbol.substitutions
263         );
264 
265         Ok(symbol)
266     }
267 
268     /// Demangle the symbol and return it as a String.
269     ///
270     /// Unlike the `ToString` implementation, this function allows options to
271     /// be specified.
272     ///
273     /// ```
274     /// use cpp_demangle::{DemangleOptions, Symbol};
275     /// use std::string::ToString;
276     ///
277     /// let mangled = b"_ZN5space3fooEibc";
278     ///
279     /// let sym = Symbol::new(&mangled[..])
280     ///     .expect("Could not parse mangled symbol!");
281     ///
282     /// let demangled = sym.to_string();
283     /// let options = DemangleOptions::default();
284     /// let demangled_again = sym.demangle(&options).unwrap();
285     /// assert_eq!(demangled_again, demangled);
286     /// ```
287     #[allow(clippy::trivially_copy_pass_by_ref)]
demangle(&self, options: &DemangleOptions) -> ::std::result::Result<String, fmt::Error>288     pub fn demangle(&self, options: &DemangleOptions) -> ::std::result::Result<String, fmt::Error> {
289         let mut out = String::new();
290         {
291             let mut ctx = ast::DemangleContext::new(
292                 &self.substitutions,
293                 self.raw.as_ref(),
294                 *options,
295                 &mut out,
296             );
297             self.parsed.demangle(&mut ctx, None)?;
298         }
299 
300         Ok(out)
301     }
302 
303     /// Demangle the symbol to a DemangleWrite, which lets the consumer be informed about
304     /// syntactic structure.
305     #[allow(clippy::trivially_copy_pass_by_ref)]
structured_demangle<W: DemangleWrite>( &self, out: &mut W, options: &DemangleOptions, ) -> fmt::Result306     pub fn structured_demangle<W: DemangleWrite>(
307         &self,
308         out: &mut W,
309         options: &DemangleOptions,
310     ) -> fmt::Result {
311         let mut ctx =
312             ast::DemangleContext::new(&self.substitutions, self.raw.as_ref(), *options, out);
313         self.parsed.demangle(&mut ctx, None)
314     }
315 }
316 
317 /// The type of a demangled AST node.
318 /// This is only partial, not all nodes are represented.
319 #[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
320 pub enum DemangleNodeType {
321     /// Entering a <prefix> production
322     Prefix,
323     /// Entering a <template-prefix> production
324     TemplatePrefix,
325     /// Entering a <template-args> production
326     TemplateArgs,
327     /// Entering a <unqualified-name> production
328     UnqualifiedName,
329     /// Entering a <template-param> production
330     TemplateParam,
331     /// Entering a <decltype> production
332     Decltype,
333     /// Entering a <data-member-prefix> production
334     DataMemberPrefix,
335     /// Entering a <nested-name> production
336     NestedName,
337     /// Entering a <special-name> production that is a vtable.
338     VirtualTable,
339     /// Additional values may be added in the future. Use a
340     /// _ pattern for compatibility.
341     __NonExhaustive,
342 }
343 
344 /// Sink for demangled text that reports syntactic structure.
345 pub trait DemangleWrite {
346     /// Called when we are entering the scope of some AST node.
push_demangle_node(&mut self, _: DemangleNodeType)347     fn push_demangle_node(&mut self, _: DemangleNodeType) {}
348     /// Same as `fmt::Write::write_str`.
write_string(&mut self, s: &str) -> fmt::Result349     fn write_string(&mut self, s: &str) -> fmt::Result;
350     /// Called when we are exiting the scope of some AST node for
351     /// which `push_demangle_node` was called.
pop_demangle_node(&mut self)352     fn pop_demangle_node(&mut self) {}
353 }
354 
355 impl<W: fmt::Write> DemangleWrite for W {
write_string(&mut self, s: &str) -> fmt::Result356     fn write_string(&mut self, s: &str) -> fmt::Result {
357         fmt::Write::write_str(self, s)
358     }
359 }
360 
361 impl<'a, T> Symbol<&'a T>
362 where
363     T: AsRef<[u8]> + ?Sized,
364 {
365     /// Parse a mangled symbol from input and return it and the trailing tail of
366     /// bytes that come after the symbol, with the default options.
367     ///
368     /// While `Symbol::new` will return an error if there is unexpected trailing
369     /// bytes, `with_tail` simply returns the trailing bytes along with the
370     /// parsed symbol.
371     ///
372     /// ```
373     /// use cpp_demangle::BorrowedSymbol;
374     /// use std::string::ToString;
375     ///
376     /// let mangled = b"_ZN5space3fooEibc and some trailing junk";
377     ///
378     /// let (sym, tail) = BorrowedSymbol::with_tail(&mangled[..])
379     ///     .expect("Could not parse mangled symbol!");
380     ///
381     /// assert_eq!(tail, b" and some trailing junk");
382     ///
383     /// let demangled = sym.to_string();
384     /// assert_eq!(demangled, "space::foo(int, bool, char)");
385     /// ```
386     #[inline]
with_tail(input: &'a T) -> Result<(BorrowedSymbol<'a>, &'a [u8])>387     pub fn with_tail(input: &'a T) -> Result<(BorrowedSymbol<'a>, &'a [u8])> {
388         Self::with_tail_and_options(input, &Default::default())
389     }
390 
391     /// Parse a mangled symbol from input and return it and the trailing tail of
392     /// bytes that come after the symbol.
393     ///
394     /// While `Symbol::new_with_options` will return an error if there is
395     /// unexpected trailing bytes, `with_tail_and_options` simply returns the
396     /// trailing bytes along with the parsed symbol.
397     ///
398     /// ```
399     /// use cpp_demangle::{BorrowedSymbol, ParseOptions};
400     /// use std::string::ToString;
401     ///
402     /// let mangled = b"_ZN5space3fooEibc and some trailing junk";
403     ///
404     /// let parse_options = ParseOptions::default()
405     ///     .recursion_limit(1024);
406     ///
407     /// let (sym, tail) = BorrowedSymbol::with_tail_and_options(&mangled[..], &parse_options)
408     ///     .expect("Could not parse mangled symbol!");
409     ///
410     /// assert_eq!(tail, b" and some trailing junk");
411     ///
412     /// let demangled = sym.to_string();
413     /// assert_eq!(demangled, "space::foo(int, bool, char)");
414     /// ```
with_tail_and_options( input: &'a T, options: &ParseOptions, ) -> Result<(BorrowedSymbol<'a>, &'a [u8])>415     pub fn with_tail_and_options(
416         input: &'a T,
417         options: &ParseOptions,
418     ) -> Result<(BorrowedSymbol<'a>, &'a [u8])> {
419         let mut substitutions = subs::SubstitutionTable::new();
420 
421         let ctx = ParseContext::new(*options);
422         let idx_str = IndexStr::new(input.as_ref());
423         let (parsed, tail) = ast::MangledName::parse(&ctx, &mut substitutions, idx_str)?;
424         debug_assert!(ctx.recursion_level() == 0);
425 
426         let symbol = Symbol {
427             raw: input.as_ref(),
428             substitutions: substitutions,
429             parsed: parsed,
430         };
431 
432         log!(
433             "Successfully parsed '{}' as
434 
435 AST = {:#?}
436 
437 substitutions = {:#?}",
438             String::from_utf8_lossy(symbol.raw),
439             symbol.parsed,
440             symbol.substitutions
441         );
442 
443         Ok((symbol, tail.into()))
444     }
445 }
446 
447 impl<T> fmt::Display for Symbol<T>
448 where
449     T: AsRef<[u8]>,
450 {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result451     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
452         let mut out = String::new();
453         {
454             let options = DemangleOptions::default();
455             let mut ctx = ast::DemangleContext::new(
456                 &self.substitutions,
457                 self.raw.as_ref(),
458                 options,
459                 &mut out,
460             );
461             self.parsed.demangle(&mut ctx, None).map_err(|err| {
462                 log!("Demangling error: {:#?}", err);
463                 fmt::Error
464             })?;
465         }
466         write!(f, "{}", &out)
467     }
468 }
469