wasmtime_internal_unwinder/
exception_table.rs

1//! Compact representation of exception handlers associated with
2//! callsites, for use when searching a Cranelift stack for a handler.
3//!
4//! This module implements (i) conversion from the metadata provided
5//! alongside Cranelift's compilation result (as provided by
6//! [`cranelift_codegen::MachBufferFinalized::call_sites`]) to its
7//! format, and (ii) use of its format to find a handler efficiently.
8//!
9//! The format has been designed so that it can be mapped in from disk
10//! and used without post-processing; this enables efficient
11//! module-loading in runtimes such as Wasmtime.
12
13use object::{Bytes, LittleEndian, U32Bytes};
14
15#[cfg(feature = "cranelift")]
16use alloc::{vec, vec::Vec};
17#[cfg(feature = "cranelift")]
18use cranelift_codegen::{FinalizedMachCallSite, binemit::CodeOffset};
19
20/// Collector struct for exception handlers per call site.
21///
22/// # Format
23///
24/// We keep four different arrays (`Vec`s) that we build as we visit
25/// callsites, in ascending offset (address relative to beginning of
26/// code segment) order: tags, destination offsets, callsite offsets,
27/// and tag/destination ranges.
28///
29/// The callsite offsets and tag/destination ranges logically form a
30/// sorted lookup array, allowing us to find information for any
31/// single callsite. The range denotes a range of indices in the tag
32/// and destination offset arrays, and those are sorted by tag per
33/// callsite. Ranges are stored with the (exclusive) *end* index only;
34/// the start index is implicit as the previous end, or zero if first
35/// element.
36///
37/// # Example
38///
39/// An example of this data format:
40///
41/// ```plain
42/// callsites: [0x10, 0x50, 0xf0] // callsites (return addrs) at offsets 0x10, 0x50, 0xf0
43/// ranges: [2, 4, 5]             // corresponding ranges for each callsite
44/// tags: [1, 5, 1, -1, -1]       // tags for each handler at each callsite
45/// handlers: [0x40, 0x42, 0x6f, 0x71, 0xf5] // handler destinations at each callsite
46/// ```
47///
48/// Expanding this out:
49///
50/// ```plain
51/// callsites: [0x10, 0x50, 0xf0],  # PCs relative to some start of return-points.
52/// ranges: [
53///     2,  # callsite 0x10 has tags/handlers indices 0..2
54///     4,  # callsite 0x50 has tags/handlers indices 2..4
55///     5,  # callsite 0xf0 has tags/handlers indices 4..5
56/// ],
57/// tags: [
58///     # tags for callsite 0x10:
59///     1,
60///     5,
61///     # tags for callsite 0x50:
62///     1,
63///     -1,  # "catch-all"
64///     # tags for callsite 0xf0:
65///     -1,  # "catch-all"
66/// ]
67/// handlers: [
68///     # handlers for callsite 0x10:
69///     0x40,  # relative PC to handle tag 1 (above)
70///     0x42,  # relative PC to handle tag 5
71///     # handlers for callsite 0x50:
72///     0x6f,  # relative PC to handle tag 1
73///     0x71,  # relative PC to handle all other tags
74///     # handlers for callsite 0xf0:
75///     0xf5,  # relative PC to handle all other tags
76/// ]
77/// ```
78#[cfg(feature = "cranelift")]
79#[derive(Clone, Debug, Default)]
80pub struct ExceptionTableBuilder {
81    pub callsites: Vec<U32Bytes<LittleEndian>>,
82    pub ranges: Vec<U32Bytes<LittleEndian>>,
83    pub tags: Vec<U32Bytes<LittleEndian>>,
84    pub handlers: Vec<U32Bytes<LittleEndian>>,
85    last_start_offset: CodeOffset,
86}
87
88#[cfg(feature = "cranelift")]
89impl ExceptionTableBuilder {
90    /// Add a function at a given offset from the start of the
91    /// compiled code section, recording information about its call
92    /// sites.
93    ///
94    /// Functions must be added in ascending offset order.
95    pub fn add_func<'a>(
96        &mut self,
97        start_offset: CodeOffset,
98        call_sites: impl Iterator<Item = FinalizedMachCallSite<'a>>,
99    ) -> anyhow::Result<()> {
100        // Ensure that we see functions in offset order.
101        assert!(start_offset >= self.last_start_offset);
102        self.last_start_offset = start_offset;
103
104        // Visit each callsite in turn, translating offsets from
105        // function-local to section-local.
106        let mut handlers = vec![];
107        for call_site in call_sites {
108            let ret_addr = call_site.ret_addr.checked_add(start_offset).unwrap();
109            handlers.extend(call_site.exception_handlers.iter().cloned());
110            handlers.sort_by_key(|(tag, _dest)| *tag);
111
112            if handlers.windows(2).any(|parts| parts[0].0 == parts[1].0) {
113                anyhow::bail!("Duplicate handler tag");
114            }
115
116            let start_idx = u32::try_from(self.tags.len()).unwrap();
117            for (tag, dest) in handlers.drain(..) {
118                self.tags.push(U32Bytes::new(
119                    LittleEndian,
120                    tag.expand().map(|t| t.as_u32()).unwrap_or(u32::MAX),
121                ));
122                self.handlers.push(U32Bytes::new(
123                    LittleEndian,
124                    dest.checked_add(start_offset).unwrap(),
125                ));
126            }
127            let end_idx = u32::try_from(self.tags.len()).unwrap();
128
129            // Omit empty callsites for compactness.
130            if end_idx > start_idx {
131                self.ranges.push(U32Bytes::new(LittleEndian, end_idx));
132                self.callsites.push(U32Bytes::new(LittleEndian, ret_addr));
133            }
134        }
135
136        Ok(())
137    }
138
139    /// Serialize the exception-handler data section, taking a closure
140    /// to consume slices.
141    pub fn serialize<F: FnMut(&[u8])>(&self, mut f: F) {
142        // Serialize the length of `callsites` / `ranges`.
143        let callsite_count = u32::try_from(self.callsites.len()).unwrap();
144        f(&callsite_count.to_le_bytes());
145        // Serialize the length of `tags` / `handlers`.
146        let handler_count = u32::try_from(self.handlers.len()).unwrap();
147        f(&handler_count.to_le_bytes());
148
149        // Serialize `callsites`, `ranges`, `tags`, and `handlers` in
150        // that order.
151        f(object::bytes_of_slice(&self.callsites));
152        f(object::bytes_of_slice(&self.ranges));
153        f(object::bytes_of_slice(&self.tags));
154        f(object::bytes_of_slice(&self.handlers));
155    }
156
157    /// Serialize the exception-handler data section to a vector of
158    /// bytes.
159    pub fn to_vec(&self) -> Vec<u8> {
160        let mut bytes = vec![];
161        self.serialize(|slice| bytes.extend(slice.iter().cloned()));
162        bytes
163    }
164}
165
166/// ExceptionTable deserialized from a serialized slice.
167///
168/// This struct retains borrows of the various serialized parts of the
169/// exception table data as produced by
170/// [`ExceptionTableBuilder::serialize`].
171#[derive(Clone, Debug)]
172pub struct ExceptionTable<'a> {
173    callsites: &'a [U32Bytes<LittleEndian>],
174    ranges: &'a [U32Bytes<LittleEndian>],
175    tags: &'a [U32Bytes<LittleEndian>],
176    handlers: &'a [U32Bytes<LittleEndian>],
177}
178
179impl<'a> ExceptionTable<'a> {
180    /// Parse exception tables from a byte-slice as produced by
181    /// [`ExceptionTableBuilder::serialize`].
182    pub fn parse(data: &'a [u8]) -> anyhow::Result<ExceptionTable<'a>> {
183        let mut data = Bytes(data);
184        let callsite_count = data
185            .read::<U32Bytes<LittleEndian>>()
186            .map_err(|_| anyhow::anyhow!("Unable to read callsite count prefix"))?;
187        let callsite_count = usize::try_from(callsite_count.get(LittleEndian))?;
188        let handler_count = data
189            .read::<U32Bytes<LittleEndian>>()
190            .map_err(|_| anyhow::anyhow!("Unable to read handler count prefix"))?;
191        let handler_count = usize::try_from(handler_count.get(LittleEndian))?;
192        let (callsites, data) =
193            object::slice_from_bytes::<U32Bytes<LittleEndian>>(data.0, callsite_count)
194                .map_err(|_| anyhow::anyhow!("Unable to read callsites slice"))?;
195        let (ranges, data) =
196            object::slice_from_bytes::<U32Bytes<LittleEndian>>(data, callsite_count)
197                .map_err(|_| anyhow::anyhow!("Unable to read ranges slice"))?;
198        let (tags, data) = object::slice_from_bytes::<U32Bytes<LittleEndian>>(data, handler_count)
199            .map_err(|_| anyhow::anyhow!("Unable to read tags slice"))?;
200        let (handlers, data) =
201            object::slice_from_bytes::<U32Bytes<LittleEndian>>(data, handler_count)
202                .map_err(|_| anyhow::anyhow!("Unable to read handlers slice"))?;
203
204        if !data.is_empty() {
205            anyhow::bail!("Unexpected data at end of serialized exception table");
206        }
207
208        Ok(ExceptionTable {
209            callsites,
210            ranges,
211            tags,
212            handlers,
213        })
214    }
215
216    /// Look up the handler destination, if any, for a given return
217    /// address (as an offset into the code section) and exception
218    /// tag.
219    ///
220    /// Note: we use raw `u32` types for code offsets and tags here to
221    /// avoid dependencies on `cranelift-codegen` when this crate is
222    /// built without compiler backend support (runtime-only config).
223    pub fn lookup(&self, pc: u32, tag: u32) -> Option<u32> {
224        // First, look up the callsite in the sorted callsites list.
225        let callsite_idx = self
226            .callsites
227            .binary_search_by_key(&pc, |callsite| callsite.get(LittleEndian))
228            .ok()?;
229        // Now get the range.
230        let end_idx = self.ranges[callsite_idx].get(LittleEndian);
231        let start_idx = if callsite_idx > 0 {
232            self.ranges[callsite_idx - 1].get(LittleEndian)
233        } else {
234            0
235        };
236
237        // Take the subslices of `tags` and `handlers` corresponding
238        // to this callsite.
239        let start_idx = usize::try_from(start_idx).unwrap();
240        let end_idx = usize::try_from(end_idx).unwrap();
241        let tags = &self.tags[start_idx..end_idx];
242        let handlers = &self.handlers[start_idx..end_idx];
243
244        // Is there any handler with an exact tag match?
245        if let Ok(handler_idx) = tags.binary_search_by_key(&tag, |tag| tag.get(LittleEndian)) {
246            return Some(handlers[handler_idx].get(LittleEndian));
247        }
248
249        // If not, is there a fallback handler? Note that we serialize
250        // it with the tag `u32::MAX`, so it is always last in sorted
251        // order.
252        if tags.last().map(|v| v.get(LittleEndian)) == Some(u32::MAX) {
253            return Some(handlers.last().unwrap().get(LittleEndian));
254        }
255
256        None
257    }
258}
259
260#[cfg(all(test, feature = "cranelift"))]
261mod test {
262    use super::*;
263    use cranelift_codegen::entity::EntityRef;
264    use cranelift_codegen::ir::ExceptionTag;
265
266    #[test]
267    fn serialize_exception_table() {
268        let callsites = [
269            FinalizedMachCallSite {
270                ret_addr: 0x10,
271                exception_handlers: &[
272                    (Some(ExceptionTag::new(1)).into(), 0x20),
273                    (Some(ExceptionTag::new(2)).into(), 0x30),
274                    (None.into(), 0x40),
275                ],
276            },
277            FinalizedMachCallSite {
278                ret_addr: 0x48,
279                exception_handlers: &[],
280            },
281            FinalizedMachCallSite {
282                ret_addr: 0x50,
283                exception_handlers: &[(None.into(), 0x60)],
284            },
285        ];
286
287        let mut builder = ExceptionTableBuilder::default();
288        builder.add_func(0x100, callsites.into_iter()).unwrap();
289        let mut bytes = vec![];
290        builder.serialize(|slice| bytes.extend(slice.iter().cloned()));
291
292        let deserialized = ExceptionTable::parse(&bytes).unwrap();
293
294        assert_eq!(deserialized.lookup(0x148, 1), None);
295        assert_eq!(deserialized.lookup(0x110, 1), Some(0x120));
296        assert_eq!(deserialized.lookup(0x110, 2), Some(0x130));
297        assert_eq!(deserialized.lookup(0x110, 42), Some(0x140));
298        assert_eq!(deserialized.lookup(0x150, 100), Some(0x160));
299    }
300}