cpp_demangle/
lib.rs

1//! This crate can parse a C++ “mangled” linker symbol name into a Rust value
2//! describing what the name refers to: a variable, a function, a virtual table,
3//! etc. The description type implements `Display`, producing human-readable
4//! text describing the mangled name. Debuggers and profilers can use this crate
5//! to provide more meaningful output.
6//!
7//! C++ requires the compiler to choose names for linker symbols consistently
8//! across compilation units, so that two compilation units that have seen the
9//! same declarations can pair up definitions in one unit with references in
10//! another.  Almost all platforms other than Microsoft Windows follow the
11//! [Itanium C++ ABI][itanium]'s rules for this.
12//!
13//! [itanium]: https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling
14//!
15//! For example, suppose a C++ compilation unit has the definition:
16//!
17//! ```c++
18//! namespace space {
19//!   int foo(int x, int y) { return x+y; }
20//! }
21//! ```
22//!
23//! The Itanium C++ ABI specifies that the linker symbol for that function must
24//! be named `_ZN5space3fooEii`. This crate can parse that name into a Rust
25//! value representing its structure. Formatting the value with the `format!`
26//! macro or the `std::string::ToString::to_string` trait method yields the
27//! string `space::foo(int, int)`, which is more meaningful to the C++
28//! developer.
29
30#![deny(missing_docs)]
31#![deny(missing_debug_implementations)]
32#![deny(unsafe_code)]
33// Clippy stuff.
34#![allow(unknown_lints)]
35#![allow(clippy::inline_always)]
36#![allow(clippy::redundant_field_names)]
37#![cfg_attr(all(not(feature = "std"), feature = "alloc"), no_std)]
38#![cfg_attr(all(not(feature = "std"), feature = "alloc"), feature(alloc))]
39
40#[macro_use]
41extern crate cfg_if;
42
43cfg_if! {
44    if #[cfg(all(not(feature = "std"), feature = "alloc"))] {
45        extern crate core as std;
46        #[macro_use]
47        extern crate alloc;
48        mod imports {
49            pub use alloc::boxed;
50            pub use alloc::vec;
51            pub use alloc::string;
52            pub use alloc::borrow;
53            pub use alloc::collections::btree_map;
54        }
55    } else {
56        mod imports {
57            pub use std::boxed;
58            pub use std::vec;
59            pub use std::string;
60            pub use std::borrow;
61            pub use std::collections::btree_map;
62        }
63    }
64}
65
66use imports::*;
67
68use string::String;
69use vec::Vec;
70
71#[macro_use]
72mod logging;
73
74pub mod ast;
75pub mod error;
76mod index_str;
77mod subs;
78
79use ast::{Demangle, Parse, ParseContext};
80use error::{Error, Result};
81use index_str::IndexStr;
82use std::fmt;
83use std::num::NonZeroU32;
84
85/// Options to control the parsing process.
86#[derive(Clone, Copy, Debug, Default)]
87#[repr(C)]
88pub struct ParseOptions {
89    recursion_limit: Option<NonZeroU32>,
90}
91
92impl ParseOptions {
93    /// Set the limit on recursion depth during the parsing phase. A low
94    /// limit will cause valid symbols to be rejected, but a high limit may
95    /// allow pathological symbols to overflow the stack during parsing.
96    /// The default value is 96, which will not overflow the stack even in
97    /// a debug build.
98    pub fn recursion_limit(mut self, limit: u32) -> Self {
99        self.recursion_limit = Some(NonZeroU32::new(limit).expect("Recursion limit must be > 0"));
100        self
101    }
102}
103
104/// Options to control the demangling process.
105#[derive(Clone, Copy, Debug, Default)]
106#[repr(C)]
107pub struct DemangleOptions {
108    no_params: bool,
109    no_return_type: bool,
110    hide_expression_literal_types: bool,
111    recursion_limit: Option<NonZeroU32>,
112}
113
114impl DemangleOptions {
115    /// Construct a new `DemangleOptions` with the default values.
116    pub fn new() -> Self {
117        Default::default()
118    }
119
120    /// Do not display function arguments.
121    pub fn no_params(mut self) -> Self {
122        self.no_params = true;
123        self
124    }
125
126    /// Do not display the function return type.
127    pub fn no_return_type(mut self) -> Self {
128        self.no_return_type = true;
129        self
130    }
131
132    /// Hide type annotations in template value parameters.
133    /// These are not needed to distinguish template instances
134    /// so this can make it easier to match user-provided
135    /// template instance names.
136    pub fn hide_expression_literal_types(mut self) -> Self {
137        self.hide_expression_literal_types = true;
138        self
139    }
140
141    /// Set the limit on recursion depth during the demangling phase. A low
142    /// limit will cause valid symbols to be rejected, but a high limit may
143    /// allow pathological symbols to overflow the stack during demangling.
144    /// The default value is 128.
145    pub fn recursion_limit(mut self, limit: u32) -> Self {
146        self.recursion_limit = Some(NonZeroU32::new(limit).expect("Recursion limit must be > 0"));
147        self
148    }
149}
150
151/// A `Symbol` which owns the underlying storage for the mangled name.
152pub type OwnedSymbol = Symbol<Vec<u8>>;
153
154/// A `Symbol` which borrows the underlying storage for the mangled name.
155pub type BorrowedSymbol<'a> = Symbol<&'a [u8]>;
156
157/// A mangled symbol that has been parsed into an AST.
158///
159/// This is generic over some storage type `T` which can be either owned or
160/// borrowed. See the `OwnedSymbol` and `BorrowedSymbol` type aliases.
161#[derive(Clone, Debug, PartialEq)]
162pub struct Symbol<T> {
163    raw: T,
164    substitutions: subs::SubstitutionTable,
165    parsed: ast::MangledName,
166}
167
168impl<T> Symbol<T>
169where
170    T: AsRef<[u8]>,
171{
172    /// Given some raw storage, parse the mangled symbol from it with the default
173    /// options.
174    ///
175    /// ```
176    /// use cpp_demangle::Symbol;
177    /// use std::string::ToString;
178    ///
179    /// // First, something easy :)
180    ///
181    /// let mangled = b"_ZN5space3fooEibc";
182    ///
183    /// let sym = Symbol::new(&mangled[..])
184    ///     .expect("Could not parse mangled symbol!");
185    ///
186    /// let demangled = sym.to_string();
187    /// assert_eq!(demangled, "space::foo(int, bool, char)");
188    ///
189    /// // Now let's try something a little more complicated!
190    ///
191    /// let mangled =
192    ///     b"__Z28JS_GetPropertyDescriptorByIdP9JSContextN2JS6HandleIP8JSObjectEENS2_I4jsidEENS1_13MutableHandleINS1_18PropertyDescriptorEEE";
193    ///
194    /// let sym = Symbol::new(&mangled[..])
195    ///     .expect("Could not parse mangled symbol!");
196    ///
197    /// let demangled = sym.to_string();
198    /// assert_eq!(
199    ///     demangled,
200    ///     "JS_GetPropertyDescriptorById(JSContext*, JS::Handle<JSObject*>, JS::Handle<jsid>, JS::MutableHandle<JS::PropertyDescriptor>)"
201    /// );
202    /// ```
203    #[inline]
204    pub fn new(raw: T) -> Result<Symbol<T>> {
205        Self::new_with_options(raw, &Default::default())
206    }
207
208    /// Given some raw storage, parse the mangled symbol from it.
209    ///
210    /// ```
211    /// use cpp_demangle::{ParseOptions, Symbol};
212    /// use std::string::ToString;
213    ///
214    /// // First, something easy :)
215    ///
216    /// let mangled = b"_ZN5space3fooEibc";
217    ///
218    /// let parse_options = ParseOptions::default()
219    ///     .recursion_limit(1024);
220    ///
221    /// let sym = Symbol::new_with_options(&mangled[..], &parse_options)
222    ///     .expect("Could not parse mangled symbol!");
223    ///
224    /// let demangled = sym.to_string();
225    /// assert_eq!(demangled, "space::foo(int, bool, char)");
226    ///
227    /// // Now let's try something a little more complicated!
228    ///
229    /// let mangled =
230    ///     b"__Z28JS_GetPropertyDescriptorByIdP9JSContextN2JS6HandleIP8JSObjectEENS2_I4jsidEENS1_13MutableHandleINS1_18PropertyDescriptorEEE";
231    ///
232    /// let sym = Symbol::new(&mangled[..])
233    ///     .expect("Could not parse mangled symbol!");
234    ///
235    /// let demangled = sym.to_string();
236    /// assert_eq!(
237    ///     demangled,
238    ///     "JS_GetPropertyDescriptorById(JSContext*, JS::Handle<JSObject*>, JS::Handle<jsid>, JS::MutableHandle<JS::PropertyDescriptor>)"
239    /// );
240    /// ```
241    pub fn new_with_options(raw: T, options: &ParseOptions) -> Result<Symbol<T>> {
242        let mut substitutions = subs::SubstitutionTable::new();
243
244        let parsed = {
245            let ctx = ParseContext::new(*options);
246            let input = IndexStr::new(raw.as_ref());
247
248            let (parsed, tail) = ast::MangledName::parse(&ctx, &mut substitutions, input)?;
249            debug_assert!(ctx.recursion_level() == 0);
250
251            if tail.is_empty() {
252                parsed
253            } else {
254                return Err(Error::UnexpectedText);
255            }
256        };
257
258        let symbol = Symbol {
259            raw: raw,
260            substitutions: substitutions,
261            parsed: parsed,
262        };
263
264        log!(
265            "Successfully parsed '{}' as
266
267AST = {:#?}
268
269substitutions = {:#?}",
270            String::from_utf8_lossy(symbol.raw.as_ref()),
271            symbol.parsed,
272            symbol.substitutions
273        );
274
275        Ok(symbol)
276    }
277
278    /// Demangle the symbol and return it as a String.
279    ///
280    /// Unlike the `ToString` implementation, this function allows options to
281    /// be specified.
282    ///
283    /// ```
284    /// use cpp_demangle::{DemangleOptions, Symbol};
285    /// use std::string::ToString;
286    ///
287    /// let mangled = b"_ZN5space3fooEibc";
288    ///
289    /// let sym = Symbol::new(&mangled[..])
290    ///     .expect("Could not parse mangled symbol!");
291    ///
292    /// let demangled = sym.to_string();
293    /// let options = DemangleOptions::default();
294    /// let demangled_again = sym.demangle(&options).unwrap();
295    /// assert_eq!(demangled_again, demangled);
296    /// ```
297    #[allow(clippy::trivially_copy_pass_by_ref)]
298    pub fn demangle(&self, options: &DemangleOptions) -> ::std::result::Result<String, fmt::Error> {
299        let mut out = String::new();
300        {
301            let mut ctx = ast::DemangleContext::new(
302                &self.substitutions,
303                self.raw.as_ref(),
304                *options,
305                &mut out,
306            );
307            self.parsed.demangle(&mut ctx, None)?;
308        }
309
310        Ok(out)
311    }
312
313    /// Demangle the symbol to a DemangleWrite, which lets the consumer be informed about
314    /// syntactic structure.
315    #[allow(clippy::trivially_copy_pass_by_ref)]
316    pub fn structured_demangle<W: DemangleWrite>(
317        &self,
318        out: &mut W,
319        options: &DemangleOptions,
320    ) -> fmt::Result {
321        let mut ctx =
322            ast::DemangleContext::new(&self.substitutions, self.raw.as_ref(), *options, out);
323        self.parsed.demangle(&mut ctx, None)
324    }
325}
326
327/// The type of a demangled AST node.
328/// This is only partial, not all nodes are represented.
329#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
330pub enum DemangleNodeType {
331    /// Entering a <prefix> production
332    Prefix,
333    /// Entering a <template-prefix> production
334    TemplatePrefix,
335    /// Entering a <template-args> production
336    TemplateArgs,
337    /// Entering a <unqualified-name> production
338    UnqualifiedName,
339    /// Entering a <template-param> production
340    TemplateParam,
341    /// Entering a <decltype> production
342    Decltype,
343    /// Entering a <data-member-prefix> production
344    DataMemberPrefix,
345    /// Entering a <nested-name> production
346    NestedName,
347    /// Entering a <special-name> production that is a vtable.
348    VirtualTable,
349    /// Additional values may be added in the future. Use a
350    /// _ pattern for compatibility.
351    __NonExhaustive,
352}
353
354/// Sink for demangled text that reports syntactic structure.
355pub trait DemangleWrite {
356    /// Called when we are entering the scope of some AST node.
357    fn push_demangle_node(&mut self, _: DemangleNodeType) {}
358    /// Same as `fmt::Write::write_str`.
359    fn write_string(&mut self, s: &str) -> fmt::Result;
360    /// Called when we are exiting the scope of some AST node for
361    /// which `push_demangle_node` was called.
362    fn pop_demangle_node(&mut self) {}
363}
364
365impl<W: fmt::Write> DemangleWrite for W {
366    fn write_string(&mut self, s: &str) -> fmt::Result {
367        fmt::Write::write_str(self, s)
368    }
369}
370
371impl<'a, T> Symbol<&'a T>
372where
373    T: AsRef<[u8]> + ?Sized,
374{
375    /// Parse a mangled symbol from input and return it and the trailing tail of
376    /// bytes that come after the symbol, with the default options.
377    ///
378    /// While `Symbol::new` will return an error if there is unexpected trailing
379    /// bytes, `with_tail` simply returns the trailing bytes along with the
380    /// parsed symbol.
381    ///
382    /// ```
383    /// use cpp_demangle::BorrowedSymbol;
384    /// use std::string::ToString;
385    ///
386    /// let mangled = b"_ZN5space3fooEibc and some trailing junk";
387    ///
388    /// let (sym, tail) = BorrowedSymbol::with_tail(&mangled[..])
389    ///     .expect("Could not parse mangled symbol!");
390    ///
391    /// assert_eq!(tail, b" and some trailing junk");
392    ///
393    /// let demangled = sym.to_string();
394    /// assert_eq!(demangled, "space::foo(int, bool, char)");
395    /// ```
396    #[inline]
397    pub fn with_tail(input: &'a T) -> Result<(BorrowedSymbol<'a>, &'a [u8])> {
398        Self::with_tail_and_options(input, &Default::default())
399    }
400
401    /// Parse a mangled symbol from input and return it and the trailing tail of
402    /// bytes that come after the symbol.
403    ///
404    /// While `Symbol::new_with_options` will return an error if there is
405    /// unexpected trailing bytes, `with_tail_and_options` simply returns the
406    /// trailing bytes along with the parsed symbol.
407    ///
408    /// ```
409    /// use cpp_demangle::{BorrowedSymbol, ParseOptions};
410    /// use std::string::ToString;
411    ///
412    /// let mangled = b"_ZN5space3fooEibc and some trailing junk";
413    ///
414    /// let parse_options = ParseOptions::default()
415    ///     .recursion_limit(1024);
416    ///
417    /// let (sym, tail) = BorrowedSymbol::with_tail_and_options(&mangled[..], &parse_options)
418    ///     .expect("Could not parse mangled symbol!");
419    ///
420    /// assert_eq!(tail, b" and some trailing junk");
421    ///
422    /// let demangled = sym.to_string();
423    /// assert_eq!(demangled, "space::foo(int, bool, char)");
424    /// ```
425    pub fn with_tail_and_options(
426        input: &'a T,
427        options: &ParseOptions,
428    ) -> Result<(BorrowedSymbol<'a>, &'a [u8])> {
429        let mut substitutions = subs::SubstitutionTable::new();
430
431        let ctx = ParseContext::new(*options);
432        let idx_str = IndexStr::new(input.as_ref());
433        let (parsed, tail) = ast::MangledName::parse(&ctx, &mut substitutions, idx_str)?;
434        debug_assert!(ctx.recursion_level() == 0);
435
436        let symbol = Symbol {
437            raw: input.as_ref(),
438            substitutions: substitutions,
439            parsed: parsed,
440        };
441
442        log!(
443            "Successfully parsed '{}' as
444
445AST = {:#?}
446
447substitutions = {:#?}",
448            String::from_utf8_lossy(symbol.raw),
449            symbol.parsed,
450            symbol.substitutions
451        );
452
453        Ok((symbol, tail.into()))
454    }
455}
456
457impl<T> fmt::Display for Symbol<T>
458where
459    T: AsRef<[u8]>,
460{
461    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
462        let mut out = String::new();
463        {
464            let options = DemangleOptions::default();
465            let mut ctx = ast::DemangleContext::new(
466                &self.substitutions,
467                self.raw.as_ref(),
468                options,
469                &mut out,
470            );
471            self.parsed.demangle(&mut ctx, None).map_err(|err| {
472                log!("Demangling error: {:#?}", err);
473                fmt::Error
474            })?;
475        }
476        write!(f, "{}", &out)
477    }
478}