cpp_demangle/lib.rs
1//! This crate can parse a C++ “mangled” linker symbol name into a Rust value
2//! describing what the name refers to: a variable, a function, a virtual table,
3//! etc. The description type implements `Display`, producing human-readable
4//! text describing the mangled name. Debuggers and profilers can use this crate
5//! to provide more meaningful output.
6//!
7//! C++ requires the compiler to choose names for linker symbols consistently
8//! across compilation units, so that two compilation units that have seen the
9//! same declarations can pair up definitions in one unit with references in
10//! another. Almost all platforms other than Microsoft Windows follow the
11//! [Itanium C++ ABI][itanium]'s rules for this.
12//!
13//! [itanium]: https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling
14//!
15//! For example, suppose a C++ compilation unit has the definition:
16//!
17//! ```c++
18//! namespace space {
19//! int foo(int x, int y) { return x+y; }
20//! }
21//! ```
22//!
23//! The Itanium C++ ABI specifies that the linker symbol for that function must
24//! be named `_ZN5space3fooEii`. This crate can parse that name into a Rust
25//! value representing its structure. Formatting the value with the `format!`
26//! macro or the `std::string::ToString::to_string` trait method yields the
27//! string `space::foo(int, int)`, which is more meaningful to the C++
28//! developer.
29
30#![deny(missing_docs)]
31#![deny(missing_debug_implementations)]
32#![deny(unsafe_code)]
33// Clippy stuff.
34#![allow(unknown_lints)]
35#![allow(clippy::inline_always)]
36#![allow(clippy::redundant_field_names)]
37#![cfg_attr(all(not(feature = "std"), feature = "alloc"), no_std)]
38#![cfg_attr(all(not(feature = "std"), feature = "alloc"), feature(alloc))]
39
40#[macro_use]
41extern crate cfg_if;
42
43cfg_if! {
44 if #[cfg(all(not(feature = "std"), feature = "alloc"))] {
45 extern crate core as std;
46 #[macro_use]
47 extern crate alloc;
48 mod imports {
49 pub use alloc::boxed;
50 pub use alloc::vec;
51 pub use alloc::string;
52 pub use alloc::borrow;
53 pub use alloc::collections::btree_map;
54 }
55 } else {
56 mod imports {
57 pub use std::boxed;
58 pub use std::vec;
59 pub use std::string;
60 pub use std::borrow;
61 pub use std::collections::btree_map;
62 }
63 }
64}
65
66use imports::*;
67
68use string::String;
69use vec::Vec;
70
71#[macro_use]
72mod logging;
73
74pub mod ast;
75pub mod error;
76mod index_str;
77mod subs;
78
79use ast::{Demangle, Parse, ParseContext};
80use error::{Error, Result};
81use index_str::IndexStr;
82use std::fmt;
83use std::num::NonZeroU32;
84
85/// Options to control the parsing process.
86#[derive(Clone, Copy, Debug, Default)]
87#[repr(C)]
88pub struct ParseOptions {
89 recursion_limit: Option<NonZeroU32>,
90}
91
92impl ParseOptions {
93 /// Set the limit on recursion depth during the parsing phase. A low
94 /// limit will cause valid symbols to be rejected, but a high limit may
95 /// allow pathological symbols to overflow the stack during parsing.
96 /// The default value is 96, which will not overflow the stack even in
97 /// a debug build.
98 pub fn recursion_limit(mut self, limit: u32) -> Self {
99 self.recursion_limit = Some(NonZeroU32::new(limit).expect("Recursion limit must be > 0"));
100 self
101 }
102}
103
104/// Options to control the demangling process.
105#[derive(Clone, Copy, Debug, Default)]
106#[repr(C)]
107pub struct DemangleOptions {
108 no_params: bool,
109 no_return_type: bool,
110 hide_expression_literal_types: bool,
111 recursion_limit: Option<NonZeroU32>,
112}
113
114impl DemangleOptions {
115 /// Construct a new `DemangleOptions` with the default values.
116 pub fn new() -> Self {
117 Default::default()
118 }
119
120 /// Do not display function arguments.
121 pub fn no_params(mut self) -> Self {
122 self.no_params = true;
123 self
124 }
125
126 /// Do not display the function return type.
127 pub fn no_return_type(mut self) -> Self {
128 self.no_return_type = true;
129 self
130 }
131
132 /// Hide type annotations in template value parameters.
133 /// These are not needed to distinguish template instances
134 /// so this can make it easier to match user-provided
135 /// template instance names.
136 pub fn hide_expression_literal_types(mut self) -> Self {
137 self.hide_expression_literal_types = true;
138 self
139 }
140
141 /// Set the limit on recursion depth during the demangling phase. A low
142 /// limit will cause valid symbols to be rejected, but a high limit may
143 /// allow pathological symbols to overflow the stack during demangling.
144 /// The default value is 128.
145 pub fn recursion_limit(mut self, limit: u32) -> Self {
146 self.recursion_limit = Some(NonZeroU32::new(limit).expect("Recursion limit must be > 0"));
147 self
148 }
149}
150
151/// A `Symbol` which owns the underlying storage for the mangled name.
152pub type OwnedSymbol = Symbol<Vec<u8>>;
153
154/// A `Symbol` which borrows the underlying storage for the mangled name.
155pub type BorrowedSymbol<'a> = Symbol<&'a [u8]>;
156
157/// A mangled symbol that has been parsed into an AST.
158///
159/// This is generic over some storage type `T` which can be either owned or
160/// borrowed. See the `OwnedSymbol` and `BorrowedSymbol` type aliases.
161#[derive(Clone, Debug, PartialEq)]
162pub struct Symbol<T> {
163 raw: T,
164 substitutions: subs::SubstitutionTable,
165 parsed: ast::MangledName,
166}
167
168impl<T> Symbol<T>
169where
170 T: AsRef<[u8]>,
171{
172 /// Given some raw storage, parse the mangled symbol from it with the default
173 /// options.
174 ///
175 /// ```
176 /// use cpp_demangle::Symbol;
177 /// use std::string::ToString;
178 ///
179 /// // First, something easy :)
180 ///
181 /// let mangled = b"_ZN5space3fooEibc";
182 ///
183 /// let sym = Symbol::new(&mangled[..])
184 /// .expect("Could not parse mangled symbol!");
185 ///
186 /// let demangled = sym.to_string();
187 /// assert_eq!(demangled, "space::foo(int, bool, char)");
188 ///
189 /// // Now let's try something a little more complicated!
190 ///
191 /// let mangled =
192 /// b"__Z28JS_GetPropertyDescriptorByIdP9JSContextN2JS6HandleIP8JSObjectEENS2_I4jsidEENS1_13MutableHandleINS1_18PropertyDescriptorEEE";
193 ///
194 /// let sym = Symbol::new(&mangled[..])
195 /// .expect("Could not parse mangled symbol!");
196 ///
197 /// let demangled = sym.to_string();
198 /// assert_eq!(
199 /// demangled,
200 /// "JS_GetPropertyDescriptorById(JSContext*, JS::Handle<JSObject*>, JS::Handle<jsid>, JS::MutableHandle<JS::PropertyDescriptor>)"
201 /// );
202 /// ```
203 #[inline]
204 pub fn new(raw: T) -> Result<Symbol<T>> {
205 Self::new_with_options(raw, &Default::default())
206 }
207
208 /// Given some raw storage, parse the mangled symbol from it.
209 ///
210 /// ```
211 /// use cpp_demangle::{ParseOptions, Symbol};
212 /// use std::string::ToString;
213 ///
214 /// // First, something easy :)
215 ///
216 /// let mangled = b"_ZN5space3fooEibc";
217 ///
218 /// let parse_options = ParseOptions::default()
219 /// .recursion_limit(1024);
220 ///
221 /// let sym = Symbol::new_with_options(&mangled[..], &parse_options)
222 /// .expect("Could not parse mangled symbol!");
223 ///
224 /// let demangled = sym.to_string();
225 /// assert_eq!(demangled, "space::foo(int, bool, char)");
226 ///
227 /// // Now let's try something a little more complicated!
228 ///
229 /// let mangled =
230 /// b"__Z28JS_GetPropertyDescriptorByIdP9JSContextN2JS6HandleIP8JSObjectEENS2_I4jsidEENS1_13MutableHandleINS1_18PropertyDescriptorEEE";
231 ///
232 /// let sym = Symbol::new(&mangled[..])
233 /// .expect("Could not parse mangled symbol!");
234 ///
235 /// let demangled = sym.to_string();
236 /// assert_eq!(
237 /// demangled,
238 /// "JS_GetPropertyDescriptorById(JSContext*, JS::Handle<JSObject*>, JS::Handle<jsid>, JS::MutableHandle<JS::PropertyDescriptor>)"
239 /// );
240 /// ```
241 pub fn new_with_options(raw: T, options: &ParseOptions) -> Result<Symbol<T>> {
242 let mut substitutions = subs::SubstitutionTable::new();
243
244 let parsed = {
245 let ctx = ParseContext::new(*options);
246 let input = IndexStr::new(raw.as_ref());
247
248 let (parsed, tail) = ast::MangledName::parse(&ctx, &mut substitutions, input)?;
249 debug_assert!(ctx.recursion_level() == 0);
250
251 if tail.is_empty() {
252 parsed
253 } else {
254 return Err(Error::UnexpectedText);
255 }
256 };
257
258 let symbol = Symbol {
259 raw: raw,
260 substitutions: substitutions,
261 parsed: parsed,
262 };
263
264 log!(
265 "Successfully parsed '{}' as
266
267AST = {:#?}
268
269substitutions = {:#?}",
270 String::from_utf8_lossy(symbol.raw.as_ref()),
271 symbol.parsed,
272 symbol.substitutions
273 );
274
275 Ok(symbol)
276 }
277
278 /// Demangle the symbol and return it as a String.
279 ///
280 /// Unlike the `ToString` implementation, this function allows options to
281 /// be specified.
282 ///
283 /// ```
284 /// use cpp_demangle::{DemangleOptions, Symbol};
285 /// use std::string::ToString;
286 ///
287 /// let mangled = b"_ZN5space3fooEibc";
288 ///
289 /// let sym = Symbol::new(&mangled[..])
290 /// .expect("Could not parse mangled symbol!");
291 ///
292 /// let demangled = sym.to_string();
293 /// let options = DemangleOptions::default();
294 /// let demangled_again = sym.demangle(&options).unwrap();
295 /// assert_eq!(demangled_again, demangled);
296 /// ```
297 #[allow(clippy::trivially_copy_pass_by_ref)]
298 pub fn demangle(&self, options: &DemangleOptions) -> ::std::result::Result<String, fmt::Error> {
299 let mut out = String::new();
300 {
301 let mut ctx = ast::DemangleContext::new(
302 &self.substitutions,
303 self.raw.as_ref(),
304 *options,
305 &mut out,
306 );
307 self.parsed.demangle(&mut ctx, None)?;
308 }
309
310 Ok(out)
311 }
312
313 /// Demangle the symbol to a DemangleWrite, which lets the consumer be informed about
314 /// syntactic structure.
315 #[allow(clippy::trivially_copy_pass_by_ref)]
316 pub fn structured_demangle<W: DemangleWrite>(
317 &self,
318 out: &mut W,
319 options: &DemangleOptions,
320 ) -> fmt::Result {
321 let mut ctx =
322 ast::DemangleContext::new(&self.substitutions, self.raw.as_ref(), *options, out);
323 self.parsed.demangle(&mut ctx, None)
324 }
325}
326
327/// The type of a demangled AST node.
328/// This is only partial, not all nodes are represented.
329#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
330pub enum DemangleNodeType {
331 /// Entering a <prefix> production
332 Prefix,
333 /// Entering a <template-prefix> production
334 TemplatePrefix,
335 /// Entering a <template-args> production
336 TemplateArgs,
337 /// Entering a <unqualified-name> production
338 UnqualifiedName,
339 /// Entering a <template-param> production
340 TemplateParam,
341 /// Entering a <decltype> production
342 Decltype,
343 /// Entering a <data-member-prefix> production
344 DataMemberPrefix,
345 /// Entering a <nested-name> production
346 NestedName,
347 /// Entering a <special-name> production that is a vtable.
348 VirtualTable,
349 /// Additional values may be added in the future. Use a
350 /// _ pattern for compatibility.
351 __NonExhaustive,
352}
353
354/// Sink for demangled text that reports syntactic structure.
355pub trait DemangleWrite {
356 /// Called when we are entering the scope of some AST node.
357 fn push_demangle_node(&mut self, _: DemangleNodeType) {}
358 /// Same as `fmt::Write::write_str`.
359 fn write_string(&mut self, s: &str) -> fmt::Result;
360 /// Called when we are exiting the scope of some AST node for
361 /// which `push_demangle_node` was called.
362 fn pop_demangle_node(&mut self) {}
363}
364
365impl<W: fmt::Write> DemangleWrite for W {
366 fn write_string(&mut self, s: &str) -> fmt::Result {
367 fmt::Write::write_str(self, s)
368 }
369}
370
371impl<'a, T> Symbol<&'a T>
372where
373 T: AsRef<[u8]> + ?Sized,
374{
375 /// Parse a mangled symbol from input and return it and the trailing tail of
376 /// bytes that come after the symbol, with the default options.
377 ///
378 /// While `Symbol::new` will return an error if there is unexpected trailing
379 /// bytes, `with_tail` simply returns the trailing bytes along with the
380 /// parsed symbol.
381 ///
382 /// ```
383 /// use cpp_demangle::BorrowedSymbol;
384 /// use std::string::ToString;
385 ///
386 /// let mangled = b"_ZN5space3fooEibc and some trailing junk";
387 ///
388 /// let (sym, tail) = BorrowedSymbol::with_tail(&mangled[..])
389 /// .expect("Could not parse mangled symbol!");
390 ///
391 /// assert_eq!(tail, b" and some trailing junk");
392 ///
393 /// let demangled = sym.to_string();
394 /// assert_eq!(demangled, "space::foo(int, bool, char)");
395 /// ```
396 #[inline]
397 pub fn with_tail(input: &'a T) -> Result<(BorrowedSymbol<'a>, &'a [u8])> {
398 Self::with_tail_and_options(input, &Default::default())
399 }
400
401 /// Parse a mangled symbol from input and return it and the trailing tail of
402 /// bytes that come after the symbol.
403 ///
404 /// While `Symbol::new_with_options` will return an error if there is
405 /// unexpected trailing bytes, `with_tail_and_options` simply returns the
406 /// trailing bytes along with the parsed symbol.
407 ///
408 /// ```
409 /// use cpp_demangle::{BorrowedSymbol, ParseOptions};
410 /// use std::string::ToString;
411 ///
412 /// let mangled = b"_ZN5space3fooEibc and some trailing junk";
413 ///
414 /// let parse_options = ParseOptions::default()
415 /// .recursion_limit(1024);
416 ///
417 /// let (sym, tail) = BorrowedSymbol::with_tail_and_options(&mangled[..], &parse_options)
418 /// .expect("Could not parse mangled symbol!");
419 ///
420 /// assert_eq!(tail, b" and some trailing junk");
421 ///
422 /// let demangled = sym.to_string();
423 /// assert_eq!(demangled, "space::foo(int, bool, char)");
424 /// ```
425 pub fn with_tail_and_options(
426 input: &'a T,
427 options: &ParseOptions,
428 ) -> Result<(BorrowedSymbol<'a>, &'a [u8])> {
429 let mut substitutions = subs::SubstitutionTable::new();
430
431 let ctx = ParseContext::new(*options);
432 let idx_str = IndexStr::new(input.as_ref());
433 let (parsed, tail) = ast::MangledName::parse(&ctx, &mut substitutions, idx_str)?;
434 debug_assert!(ctx.recursion_level() == 0);
435
436 let symbol = Symbol {
437 raw: input.as_ref(),
438 substitutions: substitutions,
439 parsed: parsed,
440 };
441
442 log!(
443 "Successfully parsed '{}' as
444
445AST = {:#?}
446
447substitutions = {:#?}",
448 String::from_utf8_lossy(symbol.raw),
449 symbol.parsed,
450 symbol.substitutions
451 );
452
453 Ok((symbol, tail.into()))
454 }
455}
456
457impl<T> fmt::Display for Symbol<T>
458where
459 T: AsRef<[u8]>,
460{
461 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
462 let mut out = String::new();
463 {
464 let options = DemangleOptions::default();
465 let mut ctx = ast::DemangleContext::new(
466 &self.substitutions,
467 self.raw.as_ref(),
468 options,
469 &mut out,
470 );
471 self.parsed.demangle(&mut ctx, None).map_err(|err| {
472 log!("Demangling error: {:#?}", err);
473 fmt::Error
474 })?;
475 }
476 write!(f, "{}", &out)
477 }
478}