wasmtime_internal_unwinder/exception_table.rs
1//! Compact representation of exception handlers associated with
2//! callsites, for use when searching a Cranelift stack for a handler.
3//!
4//! This module implements (i) conversion from the metadata provided
5//! alongside Cranelift's compilation result (as provided by
6//! [`cranelift_codegen::MachBufferFinalized::call_sites`]) to its
7//! format, and (ii) use of its format to find a handler efficiently.
8//!
9//! The format has been designed so that it can be mapped in from disk
10//! and used without post-processing; this enables efficient
11//! module-loading in runtimes such as Wasmtime.
12
13use object::{Bytes, LittleEndian, U32Bytes};
14
15#[cfg(feature = "cranelift")]
16use alloc::{vec, vec::Vec};
17#[cfg(feature = "cranelift")]
18use cranelift_codegen::{
19 ExceptionContextLoc, FinalizedMachCallSite, FinalizedMachExceptionHandler, binemit::CodeOffset,
20};
21
22/// Collector struct for exception handlers per call site.
23///
24/// # Format
25///
26/// We keep five different arrays (`Vec`s) that we build as we visit
27/// callsites, in ascending offset (address relative to beginning of
28/// code segment) order: callsite offsets, tag/destination ranges,
29/// tags, tag context SP offset, destination offsets.
30///
31/// The callsite offsets and tag/destination ranges logically form a
32/// sorted lookup array, allowing us to find information for any
33/// single callsite. The range denotes a range of indices in the
34/// tag/context and destination offset arrays. Ranges are stored with
35/// the (exclusive) *end* index only; the start index is implicit as
36/// the previous end, or zero if first element.
37///
38/// The slices of tag, context, and handlers arrays named by `ranges`
39/// for each callsite specify a series of handler items for that
40/// callsite. The tag and context together allow a
41/// dynamic-tag-instance match in the unwinder: the context specifies
42/// an offset from SP at the callsite that contains a machine word
43/// (e.g. with vmctx) that, together with the static tag index, can be
44/// used to perform a dynamic match. A context of `-1` indicates no
45/// dynamic context, and a tag of `-1` indicates a catch-all
46/// handler. If a handler item matches, control should be transferred
47/// to the code offset given in the last array, `handlers`.
48///
49/// # Example
50///
51/// An example of this data format:
52///
53/// ```plain
54/// callsites: [0x10, 0x50, 0xf0] // callsites (return addrs) at offsets 0x10, 0x50, 0xf0
55/// ranges: [2, 4, 5] // corresponding ranges for each callsite
56/// tags: [1, 5, 1, -1, -1] // tags for each handler at each callsite
57/// contexts: [-1, -1, 0x10, 0x20, 0x30] // SP-offset for context for each tag
58/// handlers: [0x40, 0x42, 0x6f, 0x71, 0xf5] // handler destinations at each callsite
59/// ```
60///
61/// Expanding this out:
62///
63/// ```plain
64/// callsites: [0x10, 0x50, 0xf0], # PCs relative to some start of return-points.
65/// ranges: [
66/// 2, # callsite 0x10 has tags/handlers indices 0..2
67/// 4, # callsite 0x50 has tags/handlers indices 2..4
68/// 5, # callsite 0xf0 has tags/handlers indices 4..5
69/// ],
70/// tags: [
71/// # tags for callsite 0x10:
72/// 1,
73/// 5,
74/// # tags for callsite 0x50:
75/// 1,
76/// -1, # "catch-all"
77/// # tags for callsite 0xf0:
78/// -1, # "catch-all"
79/// ]
80/// contexts: [
81/// # SP-offsets for context for each tag at callsite 0x10:
82/// -1,
83/// -1,
84/// # for callsite 0x50:
85/// 0x10,
86/// 0x20,
87/// # for callsite 0xf0:
88/// 0x30,
89/// ]
90/// handlers: [
91/// # handlers for callsite 0x10:
92/// 0x40, # relative PC to handle tag 1 (above)
93/// 0x42, # relative PC to handle tag 5
94/// # handlers for callsite 0x50:
95/// 0x6f, # relative PC to handle tag 1
96/// 0x71, # relative PC to handle all other tags
97/// # handlers for callsite 0xf0:
98/// 0xf5, # relative PC to handle all other tags
99/// ]
100/// ```
101#[cfg(feature = "cranelift")]
102#[derive(Clone, Debug, Default)]
103pub struct ExceptionTableBuilder {
104 pub callsites: Vec<U32Bytes<LittleEndian>>,
105 pub ranges: Vec<U32Bytes<LittleEndian>>,
106 pub tags: Vec<U32Bytes<LittleEndian>>,
107 pub contexts: Vec<U32Bytes<LittleEndian>>,
108 pub handlers: Vec<U32Bytes<LittleEndian>>,
109 last_start_offset: CodeOffset,
110}
111
112#[cfg(feature = "cranelift")]
113impl ExceptionTableBuilder {
114 /// Add a function at a given offset from the start of the
115 /// compiled code section, recording information about its call
116 /// sites.
117 ///
118 /// Functions must be added in ascending offset order.
119 pub fn add_func<'a>(
120 &mut self,
121 start_offset: CodeOffset,
122 call_sites: impl Iterator<Item = FinalizedMachCallSite<'a>>,
123 ) -> anyhow::Result<()> {
124 // Ensure that we see functions in offset order.
125 assert!(start_offset >= self.last_start_offset);
126 self.last_start_offset = start_offset;
127
128 // Visit each callsite in turn, translating offsets from
129 // function-local to section-local.
130 let mut handlers = vec![];
131 for call_site in call_sites {
132 let ret_addr = call_site.ret_addr.checked_add(start_offset).unwrap();
133 handlers.extend(call_site.exception_handlers.iter().cloned());
134
135 let start_idx = u32::try_from(self.tags.len()).unwrap();
136 let mut context = u32::MAX;
137 for handler in call_site.exception_handlers {
138 match handler {
139 FinalizedMachExceptionHandler::Tag(tag, offset) => {
140 self.tags.push(U32Bytes::new(LittleEndian, tag.as_u32()));
141 self.contexts.push(U32Bytes::new(LittleEndian, context));
142 self.handlers.push(U32Bytes::new(
143 LittleEndian,
144 offset.checked_add(start_offset).unwrap(),
145 ));
146 }
147 FinalizedMachExceptionHandler::Default(offset) => {
148 self.tags.push(U32Bytes::new(LittleEndian, u32::MAX));
149 self.contexts.push(U32Bytes::new(LittleEndian, context));
150 self.handlers.push(U32Bytes::new(
151 LittleEndian,
152 offset.checked_add(start_offset).unwrap(),
153 ));
154 }
155 FinalizedMachExceptionHandler::Context(ExceptionContextLoc::SPOffset(
156 offset,
157 )) => {
158 context = *offset;
159 }
160 FinalizedMachExceptionHandler::Context(ExceptionContextLoc::GPR(_)) => {
161 panic!(
162 "Wasmtime exception unwind info only supports dynamic contexts on the stack"
163 );
164 }
165 }
166 }
167 let end_idx = u32::try_from(self.tags.len()).unwrap();
168
169 // Omit empty callsites for compactness.
170 if end_idx > start_idx {
171 self.ranges.push(U32Bytes::new(LittleEndian, end_idx));
172 self.callsites.push(U32Bytes::new(LittleEndian, ret_addr));
173 }
174 }
175
176 Ok(())
177 }
178
179 /// Serialize the exception-handler data section, taking a closure
180 /// to consume slices.
181 pub fn serialize<F: FnMut(&[u8])>(&self, mut f: F) {
182 // Serialize the length of `callsites` / `ranges`.
183 let callsite_count = u32::try_from(self.callsites.len()).unwrap();
184 f(&callsite_count.to_le_bytes());
185 // Serialize the length of `tags` / `handlers`.
186 let handler_count = u32::try_from(self.handlers.len()).unwrap();
187 f(&handler_count.to_le_bytes());
188
189 // Serialize `callsites`, `ranges`, `tags`, and `handlers` in
190 // that order.
191 f(object::bytes_of_slice(&self.callsites));
192 f(object::bytes_of_slice(&self.ranges));
193 f(object::bytes_of_slice(&self.tags));
194 f(object::bytes_of_slice(&self.contexts));
195 f(object::bytes_of_slice(&self.handlers));
196 }
197
198 /// Serialize the exception-handler data section to a vector of
199 /// bytes.
200 pub fn to_vec(&self) -> Vec<u8> {
201 let mut bytes = vec![];
202 self.serialize(|slice| bytes.extend(slice.iter().cloned()));
203 bytes
204 }
205}
206
207/// ExceptionTable deserialized from a serialized slice.
208///
209/// This struct retains borrows of the various serialized parts of the
210/// exception table data as produced by
211/// [`ExceptionTableBuilder::serialize`].
212#[derive(Clone, Debug)]
213pub struct ExceptionTable<'a> {
214 callsites: &'a [U32Bytes<LittleEndian>],
215 ranges: &'a [U32Bytes<LittleEndian>],
216 tags: &'a [U32Bytes<LittleEndian>],
217 #[expect(
218 dead_code,
219 reason = "Will be used in subsequent PR for Wasm exception handling"
220 )]
221 contexts: &'a [U32Bytes<LittleEndian>],
222 handlers: &'a [U32Bytes<LittleEndian>],
223}
224
225impl<'a> ExceptionTable<'a> {
226 /// Parse exception tables from a byte-slice as produced by
227 /// [`ExceptionTableBuilder::serialize`].
228 pub fn parse(data: &'a [u8]) -> anyhow::Result<ExceptionTable<'a>> {
229 let mut data = Bytes(data);
230 let callsite_count = data
231 .read::<U32Bytes<LittleEndian>>()
232 .map_err(|_| anyhow::anyhow!("Unable to read callsite count prefix"))?;
233 let callsite_count = usize::try_from(callsite_count.get(LittleEndian))?;
234 let handler_count = data
235 .read::<U32Bytes<LittleEndian>>()
236 .map_err(|_| anyhow::anyhow!("Unable to read handler count prefix"))?;
237 let handler_count = usize::try_from(handler_count.get(LittleEndian))?;
238 let (callsites, data) =
239 object::slice_from_bytes::<U32Bytes<LittleEndian>>(data.0, callsite_count)
240 .map_err(|_| anyhow::anyhow!("Unable to read callsites slice"))?;
241 let (ranges, data) =
242 object::slice_from_bytes::<U32Bytes<LittleEndian>>(data, callsite_count)
243 .map_err(|_| anyhow::anyhow!("Unable to read ranges slice"))?;
244 let (tags, data) = object::slice_from_bytes::<U32Bytes<LittleEndian>>(data, handler_count)
245 .map_err(|_| anyhow::anyhow!("Unable to read tags slice"))?;
246 let (contexts, data) =
247 object::slice_from_bytes::<U32Bytes<LittleEndian>>(data, handler_count)
248 .map_err(|_| anyhow::anyhow!("Unable to read contexts slice"))?;
249 let (handlers, data) =
250 object::slice_from_bytes::<U32Bytes<LittleEndian>>(data, handler_count)
251 .map_err(|_| anyhow::anyhow!("Unable to read handlers slice"))?;
252
253 if !data.is_empty() {
254 anyhow::bail!("Unexpected data at end of serialized exception table");
255 }
256
257 Ok(ExceptionTable {
258 callsites,
259 ranges,
260 tags,
261 contexts,
262 handlers,
263 })
264 }
265
266 /// Look up the handler destination, if any, for a given return
267 /// address (as an offset into the code section) and exception
268 /// tag.
269 ///
270 /// Note: we use raw `u32` types for code offsets and tags here to
271 /// avoid dependencies on `cranelift-codegen` when this crate is
272 /// built without compiler backend support (runtime-only config).
273 pub fn lookup(&self, pc: u32, tag: u32) -> Option<u32> {
274 // First, look up the callsite in the sorted callsites list.
275 let callsite_idx = self
276 .callsites
277 .binary_search_by_key(&pc, |callsite| callsite.get(LittleEndian))
278 .ok()?;
279 // Now get the range.
280 let end_idx = self.ranges[callsite_idx].get(LittleEndian);
281 let start_idx = if callsite_idx > 0 {
282 self.ranges[callsite_idx - 1].get(LittleEndian)
283 } else {
284 0
285 };
286
287 // Take the subslices of `tags` and `handlers` corresponding
288 // to this callsite.
289 let start_idx = usize::try_from(start_idx).unwrap();
290 let end_idx = usize::try_from(end_idx).unwrap();
291 let tags = &self.tags[start_idx..end_idx];
292 let handlers = &self.handlers[start_idx..end_idx];
293
294 // Is there any handler with an exact tag match?
295 if let Ok(handler_idx) = tags.binary_search_by_key(&tag, |tag| tag.get(LittleEndian)) {
296 return Some(handlers[handler_idx].get(LittleEndian));
297 }
298
299 // If not, is there a fallback handler? Note that we serialize
300 // it with the tag `u32::MAX`, so it is always last in sorted
301 // order.
302 if tags.last().map(|v| v.get(LittleEndian)) == Some(u32::MAX) {
303 return Some(handlers.last().unwrap().get(LittleEndian));
304 }
305
306 None
307 }
308}
309
310#[cfg(all(test, feature = "cranelift"))]
311mod test {
312 use super::*;
313 use cranelift_codegen::entity::EntityRef;
314 use cranelift_codegen::ir::ExceptionTag;
315
316 #[test]
317 fn serialize_exception_table() {
318 let callsites = [
319 FinalizedMachCallSite {
320 ret_addr: 0x10,
321 exception_handlers: &[
322 FinalizedMachExceptionHandler::Tag(ExceptionTag::new(1), 0x20),
323 FinalizedMachExceptionHandler::Tag(ExceptionTag::new(2), 0x30),
324 FinalizedMachExceptionHandler::Default(0x40),
325 ],
326 },
327 FinalizedMachCallSite {
328 ret_addr: 0x48,
329 exception_handlers: &[],
330 },
331 FinalizedMachCallSite {
332 ret_addr: 0x50,
333 exception_handlers: &[FinalizedMachExceptionHandler::Default(0x60)],
334 },
335 ];
336
337 let mut builder = ExceptionTableBuilder::default();
338 builder.add_func(0x100, callsites.into_iter()).unwrap();
339 let mut bytes = vec![];
340 builder.serialize(|slice| bytes.extend(slice.iter().cloned()));
341
342 let deserialized = ExceptionTable::parse(&bytes).unwrap();
343
344 assert_eq!(deserialized.lookup(0x148, 1), None);
345 assert_eq!(deserialized.lookup(0x110, 1), Some(0x120));
346 assert_eq!(deserialized.lookup(0x110, 2), Some(0x130));
347 assert_eq!(deserialized.lookup(0x110, 42), Some(0x140));
348 assert_eq!(deserialized.lookup(0x150, 100), Some(0x160));
349 }
350}