wasmtime_internal_unwinder/
exception_table.rs

1//! Compact representation of exception handlers associated with
2//! callsites, for use when searching a Cranelift stack for a handler.
3//!
4//! This module implements (i) conversion from the metadata provided
5//! alongside Cranelift's compilation result (as provided by
6//! [`cranelift_codegen::MachBufferFinalized::call_sites`]) to its
7//! format, and (ii) use of its format to find a handler efficiently.
8//!
9//! The format has been designed so that it can be mapped in from disk
10//! and used without post-processing; this enables efficient
11//! module-loading in runtimes such as Wasmtime.
12
13use object::{Bytes, LittleEndian, U32Bytes};
14
15#[cfg(feature = "cranelift")]
16use alloc::{vec, vec::Vec};
17#[cfg(feature = "cranelift")]
18use cranelift_codegen::{
19    ExceptionContextLoc, FinalizedMachCallSite, FinalizedMachExceptionHandler, binemit::CodeOffset,
20};
21
22/// Collector struct for exception handlers per call site.
23///
24/// # Format
25///
26/// We keep five different arrays (`Vec`s) that we build as we visit
27/// callsites, in ascending offset (address relative to beginning of
28/// code segment) order: callsite offsets, tag/destination ranges,
29/// tags, tag context SP offset, destination offsets.
30///
31/// The callsite offsets and tag/destination ranges logically form a
32/// sorted lookup array, allowing us to find information for any
33/// single callsite. The range denotes a range of indices in the
34/// tag/context and destination offset arrays. Ranges are stored with
35/// the (exclusive) *end* index only; the start index is implicit as
36/// the previous end, or zero if first element.
37///
38/// The slices of tag, context, and handlers arrays named by `ranges`
39/// for each callsite specify a series of handler items for that
40/// callsite. The tag and context together allow a
41/// dynamic-tag-instance match in the unwinder: the context specifies
42/// an offset from SP at the callsite that contains a machine word
43/// (e.g. with vmctx) that, together with the static tag index, can be
44/// used to perform a dynamic match. A context of `-1` indicates no
45/// dynamic context, and a tag of `-1` indicates a catch-all
46/// handler. If a handler item matches, control should be transferred
47/// to the code offset given in the last array, `handlers`.
48///
49/// # Example
50///
51/// An example of this data format:
52///
53/// ```plain
54/// callsites: [0x10, 0x50, 0xf0] // callsites (return addrs) at offsets 0x10, 0x50, 0xf0
55/// ranges: [2, 4, 5]             // corresponding ranges for each callsite
56/// tags: [1, 5, 1, -1, -1]       // tags for each handler at each callsite
57/// contexts: [-1, -1, 0x10, 0x20, 0x30] // SP-offset for context for each tag
58/// handlers: [0x40, 0x42, 0x6f, 0x71, 0xf5] // handler destinations at each callsite
59/// ```
60///
61/// Expanding this out:
62///
63/// ```plain
64/// callsites: [0x10, 0x50, 0xf0],  # PCs relative to some start of return-points.
65/// ranges: [
66///     2,  # callsite 0x10 has tags/handlers indices 0..2
67///     4,  # callsite 0x50 has tags/handlers indices 2..4
68///     5,  # callsite 0xf0 has tags/handlers indices 4..5
69/// ],
70/// tags: [
71///     # tags for callsite 0x10:
72///     1,
73///     5,
74///     # tags for callsite 0x50:
75///     1,
76///     -1,  # "catch-all"
77///     # tags for callsite 0xf0:
78///     -1,  # "catch-all"
79/// ]
80/// contexts: [
81///     # SP-offsets for context for each tag at callsite 0x10:
82///     -1,
83///     -1,
84///     # for callsite 0x50:
85///     0x10,
86///     0x20,
87///     # for callsite 0xf0:
88///     0x30,
89/// ]
90/// handlers: [
91///     # handlers for callsite 0x10:
92///     0x40,  # relative PC to handle tag 1 (above)
93///     0x42,  # relative PC to handle tag 5
94///     # handlers for callsite 0x50:
95///     0x6f,  # relative PC to handle tag 1
96///     0x71,  # relative PC to handle all other tags
97///     # handlers for callsite 0xf0:
98///     0xf5,  # relative PC to handle all other tags
99/// ]
100/// ```
101#[cfg(feature = "cranelift")]
102#[derive(Clone, Debug, Default)]
103pub struct ExceptionTableBuilder {
104    pub callsites: Vec<U32Bytes<LittleEndian>>,
105    pub ranges: Vec<U32Bytes<LittleEndian>>,
106    pub tags: Vec<U32Bytes<LittleEndian>>,
107    pub contexts: Vec<U32Bytes<LittleEndian>>,
108    pub handlers: Vec<U32Bytes<LittleEndian>>,
109    last_start_offset: CodeOffset,
110}
111
112#[cfg(feature = "cranelift")]
113impl ExceptionTableBuilder {
114    /// Add a function at a given offset from the start of the
115    /// compiled code section, recording information about its call
116    /// sites.
117    ///
118    /// Functions must be added in ascending offset order.
119    pub fn add_func<'a>(
120        &mut self,
121        start_offset: CodeOffset,
122        call_sites: impl Iterator<Item = FinalizedMachCallSite<'a>>,
123    ) -> anyhow::Result<()> {
124        // Ensure that we see functions in offset order.
125        assert!(start_offset >= self.last_start_offset);
126        self.last_start_offset = start_offset;
127
128        // Visit each callsite in turn, translating offsets from
129        // function-local to section-local.
130        let mut handlers = vec![];
131        for call_site in call_sites {
132            let ret_addr = call_site.ret_addr.checked_add(start_offset).unwrap();
133            handlers.extend(call_site.exception_handlers.iter().cloned());
134
135            let start_idx = u32::try_from(self.tags.len()).unwrap();
136            let mut context = u32::MAX;
137            for handler in call_site.exception_handlers {
138                match handler {
139                    FinalizedMachExceptionHandler::Tag(tag, offset) => {
140                        self.tags.push(U32Bytes::new(LittleEndian, tag.as_u32()));
141                        self.contexts.push(U32Bytes::new(LittleEndian, context));
142                        self.handlers.push(U32Bytes::new(
143                            LittleEndian,
144                            offset.checked_add(start_offset).unwrap(),
145                        ));
146                    }
147                    FinalizedMachExceptionHandler::Default(offset) => {
148                        self.tags.push(U32Bytes::new(LittleEndian, u32::MAX));
149                        self.contexts.push(U32Bytes::new(LittleEndian, context));
150                        self.handlers.push(U32Bytes::new(
151                            LittleEndian,
152                            offset.checked_add(start_offset).unwrap(),
153                        ));
154                    }
155                    FinalizedMachExceptionHandler::Context(ExceptionContextLoc::SPOffset(
156                        offset,
157                    )) => {
158                        context = *offset;
159                    }
160                    FinalizedMachExceptionHandler::Context(ExceptionContextLoc::GPR(_)) => {
161                        panic!(
162                            "Wasmtime exception unwind info only supports dynamic contexts on the stack"
163                        );
164                    }
165                }
166            }
167            let end_idx = u32::try_from(self.tags.len()).unwrap();
168
169            // Omit empty callsites for compactness.
170            if end_idx > start_idx {
171                self.ranges.push(U32Bytes::new(LittleEndian, end_idx));
172                self.callsites.push(U32Bytes::new(LittleEndian, ret_addr));
173            }
174        }
175
176        Ok(())
177    }
178
179    /// Serialize the exception-handler data section, taking a closure
180    /// to consume slices.
181    pub fn serialize<F: FnMut(&[u8])>(&self, mut f: F) {
182        // Serialize the length of `callsites` / `ranges`.
183        let callsite_count = u32::try_from(self.callsites.len()).unwrap();
184        f(&callsite_count.to_le_bytes());
185        // Serialize the length of `tags` / `handlers`.
186        let handler_count = u32::try_from(self.handlers.len()).unwrap();
187        f(&handler_count.to_le_bytes());
188
189        // Serialize `callsites`, `ranges`, `tags`, and `handlers` in
190        // that order.
191        f(object::bytes_of_slice(&self.callsites));
192        f(object::bytes_of_slice(&self.ranges));
193        f(object::bytes_of_slice(&self.tags));
194        f(object::bytes_of_slice(&self.contexts));
195        f(object::bytes_of_slice(&self.handlers));
196    }
197
198    /// Serialize the exception-handler data section to a vector of
199    /// bytes.
200    pub fn to_vec(&self) -> Vec<u8> {
201        let mut bytes = vec![];
202        self.serialize(|slice| bytes.extend(slice.iter().cloned()));
203        bytes
204    }
205}
206
207/// ExceptionTable deserialized from a serialized slice.
208///
209/// This struct retains borrows of the various serialized parts of the
210/// exception table data as produced by
211/// [`ExceptionTableBuilder::serialize`].
212#[derive(Clone, Debug)]
213pub struct ExceptionTable<'a> {
214    callsites: &'a [U32Bytes<LittleEndian>],
215    ranges: &'a [U32Bytes<LittleEndian>],
216    tags: &'a [U32Bytes<LittleEndian>],
217    #[expect(
218        dead_code,
219        reason = "Will be used in subsequent PR for Wasm exception handling"
220    )]
221    contexts: &'a [U32Bytes<LittleEndian>],
222    handlers: &'a [U32Bytes<LittleEndian>],
223}
224
225impl<'a> ExceptionTable<'a> {
226    /// Parse exception tables from a byte-slice as produced by
227    /// [`ExceptionTableBuilder::serialize`].
228    pub fn parse(data: &'a [u8]) -> anyhow::Result<ExceptionTable<'a>> {
229        let mut data = Bytes(data);
230        let callsite_count = data
231            .read::<U32Bytes<LittleEndian>>()
232            .map_err(|_| anyhow::anyhow!("Unable to read callsite count prefix"))?;
233        let callsite_count = usize::try_from(callsite_count.get(LittleEndian))?;
234        let handler_count = data
235            .read::<U32Bytes<LittleEndian>>()
236            .map_err(|_| anyhow::anyhow!("Unable to read handler count prefix"))?;
237        let handler_count = usize::try_from(handler_count.get(LittleEndian))?;
238        let (callsites, data) =
239            object::slice_from_bytes::<U32Bytes<LittleEndian>>(data.0, callsite_count)
240                .map_err(|_| anyhow::anyhow!("Unable to read callsites slice"))?;
241        let (ranges, data) =
242            object::slice_from_bytes::<U32Bytes<LittleEndian>>(data, callsite_count)
243                .map_err(|_| anyhow::anyhow!("Unable to read ranges slice"))?;
244        let (tags, data) = object::slice_from_bytes::<U32Bytes<LittleEndian>>(data, handler_count)
245            .map_err(|_| anyhow::anyhow!("Unable to read tags slice"))?;
246        let (contexts, data) =
247            object::slice_from_bytes::<U32Bytes<LittleEndian>>(data, handler_count)
248                .map_err(|_| anyhow::anyhow!("Unable to read contexts slice"))?;
249        let (handlers, data) =
250            object::slice_from_bytes::<U32Bytes<LittleEndian>>(data, handler_count)
251                .map_err(|_| anyhow::anyhow!("Unable to read handlers slice"))?;
252
253        if !data.is_empty() {
254            anyhow::bail!("Unexpected data at end of serialized exception table");
255        }
256
257        Ok(ExceptionTable {
258            callsites,
259            ranges,
260            tags,
261            contexts,
262            handlers,
263        })
264    }
265
266    /// Look up the handler destination, if any, for a given return
267    /// address (as an offset into the code section) and exception
268    /// tag.
269    ///
270    /// Note: we use raw `u32` types for code offsets and tags here to
271    /// avoid dependencies on `cranelift-codegen` when this crate is
272    /// built without compiler backend support (runtime-only config).
273    pub fn lookup(&self, pc: u32, tag: u32) -> Option<u32> {
274        // First, look up the callsite in the sorted callsites list.
275        let callsite_idx = self
276            .callsites
277            .binary_search_by_key(&pc, |callsite| callsite.get(LittleEndian))
278            .ok()?;
279        // Now get the range.
280        let end_idx = self.ranges[callsite_idx].get(LittleEndian);
281        let start_idx = if callsite_idx > 0 {
282            self.ranges[callsite_idx - 1].get(LittleEndian)
283        } else {
284            0
285        };
286
287        // Take the subslices of `tags` and `handlers` corresponding
288        // to this callsite.
289        let start_idx = usize::try_from(start_idx).unwrap();
290        let end_idx = usize::try_from(end_idx).unwrap();
291        let tags = &self.tags[start_idx..end_idx];
292        let handlers = &self.handlers[start_idx..end_idx];
293
294        // Is there any handler with an exact tag match?
295        if let Ok(handler_idx) = tags.binary_search_by_key(&tag, |tag| tag.get(LittleEndian)) {
296            return Some(handlers[handler_idx].get(LittleEndian));
297        }
298
299        // If not, is there a fallback handler? Note that we serialize
300        // it with the tag `u32::MAX`, so it is always last in sorted
301        // order.
302        if tags.last().map(|v| v.get(LittleEndian)) == Some(u32::MAX) {
303            return Some(handlers.last().unwrap().get(LittleEndian));
304        }
305
306        None
307    }
308}
309
310#[cfg(all(test, feature = "cranelift"))]
311mod test {
312    use super::*;
313    use cranelift_codegen::entity::EntityRef;
314    use cranelift_codegen::ir::ExceptionTag;
315
316    #[test]
317    fn serialize_exception_table() {
318        let callsites = [
319            FinalizedMachCallSite {
320                ret_addr: 0x10,
321                exception_handlers: &[
322                    FinalizedMachExceptionHandler::Tag(ExceptionTag::new(1), 0x20),
323                    FinalizedMachExceptionHandler::Tag(ExceptionTag::new(2), 0x30),
324                    FinalizedMachExceptionHandler::Default(0x40),
325                ],
326            },
327            FinalizedMachCallSite {
328                ret_addr: 0x48,
329                exception_handlers: &[],
330            },
331            FinalizedMachCallSite {
332                ret_addr: 0x50,
333                exception_handlers: &[FinalizedMachExceptionHandler::Default(0x60)],
334            },
335        ];
336
337        let mut builder = ExceptionTableBuilder::default();
338        builder.add_func(0x100, callsites.into_iter()).unwrap();
339        let mut bytes = vec![];
340        builder.serialize(|slice| bytes.extend(slice.iter().cloned()));
341
342        let deserialized = ExceptionTable::parse(&bytes).unwrap();
343
344        assert_eq!(deserialized.lookup(0x148, 1), None);
345        assert_eq!(deserialized.lookup(0x110, 1), Some(0x120));
346        assert_eq!(deserialized.lookup(0x110, 2), Some(0x130));
347        assert_eq!(deserialized.lookup(0x110, 42), Some(0x140));
348        assert_eq!(deserialized.lookup(0x150, 100), Some(0x160));
349    }
350}