1 //! This crate can parse a C++ “mangled” linker symbol name into a Rust value 2 //! describing what the name refers to: a variable, a function, a virtual table, 3 //! etc. The description type implements `Display`, producing human-readable 4 //! text describing the mangled name. Debuggers and profilers can use this crate 5 //! to provide more meaningful output. 6 //! 7 //! C++ requires the compiler to choose names for linker symbols consistently 8 //! across compilation units, so that two compilation units that have seen the 9 //! same declarations can pair up definitions in one unit with references in 10 //! another. Almost all platforms other than Microsoft Windows follow the 11 //! [Itanium C++ ABI][itanium]'s rules for this. 12 //! 13 //! [itanium]: http://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangle 14 //! 15 //! For example, suppose a C++ compilation unit has the definition: 16 //! 17 //! ```c++ 18 //! namespace space { 19 //! int foo(int x, int y) { return x+y; } 20 //! } 21 //! ``` 22 //! 23 //! The Itanium C++ ABI specifies that the linker symbol for that function must 24 //! be named `_ZN5space3fooEii`. This crate can parse that name into a Rust 25 //! value representing its structure. Formatting the value with the `format!` 26 //! macro or the `std::string::ToString::to_string` trait method yields the 27 //! string `space::foo(int, int)`, which is more meaningful to the C++ 28 //! developer. 29 30 #![deny(missing_docs)] 31 #![deny(missing_debug_implementations)] 32 #![deny(unsafe_code)] 33 // Clippy stuff. 34 #![allow(unknown_lints)] 35 #![allow(clippy::inline_always)] 36 #![allow(clippy::redundant_field_names)] 37 #![cfg_attr(all(not(feature = "std"), feature = "alloc"), no_std)] 38 #![cfg_attr(all(not(feature = "std"), feature = "alloc"), feature(alloc))] 39 40 #[macro_use] 41 extern crate cfg_if; 42 43 cfg_if! { 44 if #[cfg(all(not(feature = "std"), feature = "alloc"))] { 45 extern crate core as std; 46 #[macro_use] 47 extern crate alloc; 48 mod imports { 49 pub use alloc::boxed; 50 pub use alloc::vec; 51 pub use alloc::string; 52 pub use alloc::borrow; 53 pub use alloc::collections::btree_map; 54 } 55 } else { 56 mod imports { 57 pub use std::boxed; 58 pub use std::vec; 59 pub use std::string; 60 pub use std::borrow; 61 pub use std::collections::btree_map; 62 } 63 } 64 } 65 66 use imports::*; 67 68 use string::String; 69 use vec::Vec; 70 71 #[macro_use] 72 mod logging; 73 74 pub mod ast; 75 pub mod error; 76 mod index_str; 77 mod subs; 78 79 use ast::{Demangle, Parse, ParseContext}; 80 use error::{Error, Result}; 81 use index_str::IndexStr; 82 use std::fmt; 83 use std::num::NonZeroU32; 84 85 /// Options to control the parsing process. 86 #[derive(Clone, Copy, Debug, Default)] 87 #[repr(C)] 88 pub struct ParseOptions { 89 recursion_limit: Option<NonZeroU32>, 90 } 91 92 impl ParseOptions { 93 /// Set the limit on recursion depth during the parsing phase. A low 94 /// limit will cause valid symbols to be rejected, but a high limit may 95 /// allow pathological symbols to overflow the stack during parsing. 96 /// The default value is 96, which will not overflow the stack even in 97 /// a debug build. recursion_limit(mut self, limit: u32) -> Self98 pub fn recursion_limit(mut self, limit: u32) -> Self { 99 self.recursion_limit = Some(NonZeroU32::new(limit).expect("Recursion limit must be > 0")); 100 self 101 } 102 } 103 104 /// Options to control the demangling process. 105 #[derive(Clone, Copy, Debug, Default)] 106 #[repr(C)] 107 pub struct DemangleOptions { 108 no_params: bool, 109 no_return_type: bool, 110 recursion_limit: Option<NonZeroU32>, 111 } 112 113 impl DemangleOptions { 114 /// Construct a new `DemangleOptions` with the default values. new() -> Self115 pub fn new() -> Self { 116 Default::default() 117 } 118 119 /// Do not display function arguments. no_params(mut self) -> Self120 pub fn no_params(mut self) -> Self { 121 self.no_params = true; 122 self 123 } 124 125 /// Do not display the function return type. no_return_type(mut self) -> Self126 pub fn no_return_type(mut self) -> Self { 127 self.no_return_type = true; 128 self 129 } 130 131 /// Set the limit on recursion depth during the demangling phase. A low 132 /// limit will cause valid symbols to be rejected, but a high limit may 133 /// allow pathological symbols to overflow the stack during demangling. 134 /// The default value is 128. recursion_limit(mut self, limit: u32) -> Self135 pub fn recursion_limit(mut self, limit: u32) -> Self { 136 self.recursion_limit = Some(NonZeroU32::new(limit).expect("Recursion limit must be > 0")); 137 self 138 } 139 } 140 141 /// A `Symbol` which owns the underlying storage for the mangled name. 142 pub type OwnedSymbol = Symbol<Vec<u8>>; 143 144 /// A `Symbol` which borrows the underlying storage for the mangled name. 145 pub type BorrowedSymbol<'a> = Symbol<&'a [u8]>; 146 147 /// A mangled symbol that has been parsed into an AST. 148 /// 149 /// This is generic over some storage type `T` which can be either owned or 150 /// borrowed. See the `OwnedSymbol` and `BorrowedSymbol` type aliases. 151 #[derive(Clone, Debug, PartialEq)] 152 pub struct Symbol<T> { 153 raw: T, 154 substitutions: subs::SubstitutionTable, 155 parsed: ast::MangledName, 156 } 157 158 impl<T> Symbol<T> 159 where 160 T: AsRef<[u8]>, 161 { 162 /// Given some raw storage, parse the mangled symbol from it with the default 163 /// options. 164 /// 165 /// ``` 166 /// use cpp_demangle::Symbol; 167 /// use std::string::ToString; 168 /// 169 /// // First, something easy :) 170 /// 171 /// let mangled = b"_ZN5space3fooEibc"; 172 /// 173 /// let sym = Symbol::new(&mangled[..]) 174 /// .expect("Could not parse mangled symbol!"); 175 /// 176 /// let demangled = sym.to_string(); 177 /// assert_eq!(demangled, "space::foo(int, bool, char)"); 178 /// 179 /// // Now let's try something a little more complicated! 180 /// 181 /// let mangled = 182 /// b"__Z28JS_GetPropertyDescriptorByIdP9JSContextN2JS6HandleIP8JSObjectEENS2_I4jsidEENS1_13MutableHandleINS1_18PropertyDescriptorEEE"; 183 /// 184 /// let sym = Symbol::new(&mangled[..]) 185 /// .expect("Could not parse mangled symbol!"); 186 /// 187 /// let demangled = sym.to_string(); 188 /// assert_eq!( 189 /// demangled, 190 /// "JS_GetPropertyDescriptorById(JSContext*, JS::Handle<JSObject*>, JS::Handle<jsid>, JS::MutableHandle<JS::PropertyDescriptor>)" 191 /// ); 192 /// ``` 193 #[inline] new(raw: T) -> Result<Symbol<T>>194 pub fn new(raw: T) -> Result<Symbol<T>> { 195 Self::new_with_options(raw, &Default::default()) 196 } 197 198 /// Given some raw storage, parse the mangled symbol from it. 199 /// 200 /// ``` 201 /// use cpp_demangle::{ParseOptions, Symbol}; 202 /// use std::string::ToString; 203 /// 204 /// // First, something easy :) 205 /// 206 /// let mangled = b"_ZN5space3fooEibc"; 207 /// 208 /// let parse_options = ParseOptions::default() 209 /// .recursion_limit(1024); 210 /// 211 /// let sym = Symbol::new_with_options(&mangled[..], &parse_options) 212 /// .expect("Could not parse mangled symbol!"); 213 /// 214 /// let demangled = sym.to_string(); 215 /// assert_eq!(demangled, "space::foo(int, bool, char)"); 216 /// 217 /// // Now let's try something a little more complicated! 218 /// 219 /// let mangled = 220 /// b"__Z28JS_GetPropertyDescriptorByIdP9JSContextN2JS6HandleIP8JSObjectEENS2_I4jsidEENS1_13MutableHandleINS1_18PropertyDescriptorEEE"; 221 /// 222 /// let sym = Symbol::new(&mangled[..]) 223 /// .expect("Could not parse mangled symbol!"); 224 /// 225 /// let demangled = sym.to_string(); 226 /// assert_eq!( 227 /// demangled, 228 /// "JS_GetPropertyDescriptorById(JSContext*, JS::Handle<JSObject*>, JS::Handle<jsid>, JS::MutableHandle<JS::PropertyDescriptor>)" 229 /// ); 230 /// ``` new_with_options(raw: T, options: &ParseOptions) -> Result<Symbol<T>>231 pub fn new_with_options(raw: T, options: &ParseOptions) -> Result<Symbol<T>> { 232 let mut substitutions = subs::SubstitutionTable::new(); 233 234 let parsed = { 235 let ctx = ParseContext::new(*options); 236 let input = IndexStr::new(raw.as_ref()); 237 238 let (parsed, tail) = ast::MangledName::parse(&ctx, &mut substitutions, input)?; 239 debug_assert!(ctx.recursion_level() == 0); 240 241 if tail.is_empty() { 242 parsed 243 } else { 244 return Err(Error::UnexpectedText); 245 } 246 }; 247 248 let symbol = Symbol { 249 raw: raw, 250 substitutions: substitutions, 251 parsed: parsed, 252 }; 253 254 log!( 255 "Successfully parsed '{}' as 256 257 AST = {:#?} 258 259 substitutions = {:#?}", 260 String::from_utf8_lossy(symbol.raw.as_ref()), 261 symbol.parsed, 262 symbol.substitutions 263 ); 264 265 Ok(symbol) 266 } 267 268 /// Demangle the symbol and return it as a String. 269 /// 270 /// Unlike the `ToString` implementation, this function allows options to 271 /// be specified. 272 /// 273 /// ``` 274 /// use cpp_demangle::{DemangleOptions, Symbol}; 275 /// use std::string::ToString; 276 /// 277 /// let mangled = b"_ZN5space3fooEibc"; 278 /// 279 /// let sym = Symbol::new(&mangled[..]) 280 /// .expect("Could not parse mangled symbol!"); 281 /// 282 /// let demangled = sym.to_string(); 283 /// let options = DemangleOptions::default(); 284 /// let demangled_again = sym.demangle(&options).unwrap(); 285 /// assert_eq!(demangled_again, demangled); 286 /// ``` 287 #[allow(clippy::trivially_copy_pass_by_ref)] demangle(&self, options: &DemangleOptions) -> ::std::result::Result<String, fmt::Error>288 pub fn demangle(&self, options: &DemangleOptions) -> ::std::result::Result<String, fmt::Error> { 289 let mut out = String::new(); 290 { 291 let mut ctx = ast::DemangleContext::new( 292 &self.substitutions, 293 self.raw.as_ref(), 294 *options, 295 &mut out, 296 ); 297 self.parsed.demangle(&mut ctx, None)?; 298 } 299 300 Ok(out) 301 } 302 303 /// Demangle the symbol to a DemangleWrite, which lets the consumer be informed about 304 /// syntactic structure. 305 #[allow(clippy::trivially_copy_pass_by_ref)] structured_demangle<W: DemangleWrite>( &self, out: &mut W, options: &DemangleOptions, ) -> fmt::Result306 pub fn structured_demangle<W: DemangleWrite>( 307 &self, 308 out: &mut W, 309 options: &DemangleOptions, 310 ) -> fmt::Result { 311 let mut ctx = 312 ast::DemangleContext::new(&self.substitutions, self.raw.as_ref(), *options, out); 313 self.parsed.demangle(&mut ctx, None) 314 } 315 } 316 317 /// The type of a demangled AST node. 318 /// This is only partial, not all nodes are represented. 319 #[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] 320 pub enum DemangleNodeType { 321 /// Entering a <prefix> production 322 Prefix, 323 /// Entering a <template-prefix> production 324 TemplatePrefix, 325 /// Entering a <template-args> production 326 TemplateArgs, 327 /// Entering a <unqualified-name> production 328 UnqualifiedName, 329 /// Entering a <template-param> production 330 TemplateParam, 331 /// Entering a <decltype> production 332 Decltype, 333 /// Entering a <data-member-prefix> production 334 DataMemberPrefix, 335 /// Entering a <nested-name> production 336 NestedName, 337 /// Entering a <special-name> production that is a vtable. 338 VirtualTable, 339 /// Additional values may be added in the future. Use a 340 /// _ pattern for compatibility. 341 __NonExhaustive, 342 } 343 344 /// Sink for demangled text that reports syntactic structure. 345 pub trait DemangleWrite { 346 /// Called when we are entering the scope of some AST node. push_demangle_node(&mut self, _: DemangleNodeType)347 fn push_demangle_node(&mut self, _: DemangleNodeType) {} 348 /// Same as `fmt::Write::write_str`. write_string(&mut self, s: &str) -> fmt::Result349 fn write_string(&mut self, s: &str) -> fmt::Result; 350 /// Called when we are exiting the scope of some AST node for 351 /// which `push_demangle_node` was called. pop_demangle_node(&mut self)352 fn pop_demangle_node(&mut self) {} 353 } 354 355 impl<W: fmt::Write> DemangleWrite for W { write_string(&mut self, s: &str) -> fmt::Result356 fn write_string(&mut self, s: &str) -> fmt::Result { 357 fmt::Write::write_str(self, s) 358 } 359 } 360 361 impl<'a, T> Symbol<&'a T> 362 where 363 T: AsRef<[u8]> + ?Sized, 364 { 365 /// Parse a mangled symbol from input and return it and the trailing tail of 366 /// bytes that come after the symbol, with the default options. 367 /// 368 /// While `Symbol::new` will return an error if there is unexpected trailing 369 /// bytes, `with_tail` simply returns the trailing bytes along with the 370 /// parsed symbol. 371 /// 372 /// ``` 373 /// use cpp_demangle::BorrowedSymbol; 374 /// use std::string::ToString; 375 /// 376 /// let mangled = b"_ZN5space3fooEibc and some trailing junk"; 377 /// 378 /// let (sym, tail) = BorrowedSymbol::with_tail(&mangled[..]) 379 /// .expect("Could not parse mangled symbol!"); 380 /// 381 /// assert_eq!(tail, b" and some trailing junk"); 382 /// 383 /// let demangled = sym.to_string(); 384 /// assert_eq!(demangled, "space::foo(int, bool, char)"); 385 /// ``` 386 #[inline] with_tail(input: &'a T) -> Result<(BorrowedSymbol<'a>, &'a [u8])>387 pub fn with_tail(input: &'a T) -> Result<(BorrowedSymbol<'a>, &'a [u8])> { 388 Self::with_tail_and_options(input, &Default::default()) 389 } 390 391 /// Parse a mangled symbol from input and return it and the trailing tail of 392 /// bytes that come after the symbol. 393 /// 394 /// While `Symbol::new_with_options` will return an error if there is 395 /// unexpected trailing bytes, `with_tail_and_options` simply returns the 396 /// trailing bytes along with the parsed symbol. 397 /// 398 /// ``` 399 /// use cpp_demangle::{BorrowedSymbol, ParseOptions}; 400 /// use std::string::ToString; 401 /// 402 /// let mangled = b"_ZN5space3fooEibc and some trailing junk"; 403 /// 404 /// let parse_options = ParseOptions::default() 405 /// .recursion_limit(1024); 406 /// 407 /// let (sym, tail) = BorrowedSymbol::with_tail_and_options(&mangled[..], &parse_options) 408 /// .expect("Could not parse mangled symbol!"); 409 /// 410 /// assert_eq!(tail, b" and some trailing junk"); 411 /// 412 /// let demangled = sym.to_string(); 413 /// assert_eq!(demangled, "space::foo(int, bool, char)"); 414 /// ``` with_tail_and_options( input: &'a T, options: &ParseOptions, ) -> Result<(BorrowedSymbol<'a>, &'a [u8])>415 pub fn with_tail_and_options( 416 input: &'a T, 417 options: &ParseOptions, 418 ) -> Result<(BorrowedSymbol<'a>, &'a [u8])> { 419 let mut substitutions = subs::SubstitutionTable::new(); 420 421 let ctx = ParseContext::new(*options); 422 let idx_str = IndexStr::new(input.as_ref()); 423 let (parsed, tail) = ast::MangledName::parse(&ctx, &mut substitutions, idx_str)?; 424 debug_assert!(ctx.recursion_level() == 0); 425 426 let symbol = Symbol { 427 raw: input.as_ref(), 428 substitutions: substitutions, 429 parsed: parsed, 430 }; 431 432 log!( 433 "Successfully parsed '{}' as 434 435 AST = {:#?} 436 437 substitutions = {:#?}", 438 String::from_utf8_lossy(symbol.raw), 439 symbol.parsed, 440 symbol.substitutions 441 ); 442 443 Ok((symbol, tail.into())) 444 } 445 } 446 447 impl<T> fmt::Display for Symbol<T> 448 where 449 T: AsRef<[u8]>, 450 { fmt(&self, f: &mut fmt::Formatter) -> fmt::Result451 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 452 let mut out = String::new(); 453 { 454 let options = DemangleOptions::default(); 455 let mut ctx = ast::DemangleContext::new( 456 &self.substitutions, 457 self.raw.as_ref(), 458 options, 459 &mut out, 460 ); 461 self.parsed.demangle(&mut ctx, None).map_err(|err| { 462 log!("Demangling error: {:#?}", err); 463 fmt::Error 464 })?; 465 } 466 write!(f, "{}", &out) 467 } 468 } 469