wasmtime_environ/address_map.rs
1//! Data structures to provide transformation of the source
2
3use crate::obj::ELF_WASMTIME_ADDRMAP;
4use object::write::{Object, StandardSegment};
5use object::{Bytes, LittleEndian, SectionKind, U32Bytes};
6use serde::{Deserialize, Serialize};
7use std::convert::TryFrom;
8use std::ops::Range;
9
10/// Single source location to generated address mapping.
11#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
12pub struct InstructionAddressMap {
13 /// Where in the source wasm binary this instruction comes from, specified
14 /// in an offset of bytes from the front of the file.
15 pub srcloc: FilePos,
16
17 /// Offset from the start of the function's compiled code to where this
18 /// instruction is located, or the region where it starts.
19 pub code_offset: u32,
20}
21
22/// A position within an original source file,
23///
24/// This structure is used as a newtype wrapper around a 32-bit integer which
25/// represents an offset within a file where a wasm instruction or function is
26/// to be originally found.
27#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
28pub struct FilePos(u32);
29
30impl FilePos {
31 /// Create a new file position with the given offset.
32 pub fn new(pos: u32) -> FilePos {
33 assert!(pos != u32::MAX);
34 FilePos(pos)
35 }
36
37 /// Returns the offset that this offset was created with.
38 ///
39 /// Note that the `Default` implementation will return `None` here, whereas
40 /// positions created with `FilePos::new` will return `Some`.
41 pub fn file_offset(self) -> Option<u32> {
42 if self.0 == u32::MAX {
43 None
44 } else {
45 Some(self.0)
46 }
47 }
48}
49
50impl Default for FilePos {
51 fn default() -> FilePos {
52 FilePos(u32::MAX)
53 }
54}
55
56/// Builder for the address map section of a wasmtime compilation image.
57///
58/// This builder is used to conveniently built the `ELF_WASMTIME_ADDRMAP`
59/// section by compilers, and provides utilities to directly insert the results
60/// into an `Object`.
61#[derive(Default)]
62pub struct AddressMapSection {
63 offsets: Vec<U32Bytes<LittleEndian>>,
64 positions: Vec<U32Bytes<LittleEndian>>,
65 last_offset: u32,
66}
67
68impl AddressMapSection {
69 /// Pushes a new set of instruction mapping information for a function added
70 /// in the exectuable.
71 ///
72 /// The `func` argument here is the range of the function, relative to the
73 /// start of the text section in the executable. The `instrs` provided are
74 /// the descriptors for instructions in the function and their various
75 /// mappings back to original source positions.
76 ///
77 /// This is required to be called for `func` values that are strictly
78 /// increasing in addresses (e.g. as the object is built). Additionally the
79 /// `instrs` map must be sorted based on code offset in the native text
80 /// section.
81 pub fn push(&mut self, func: Range<u64>, instrs: &[InstructionAddressMap]) {
82 // NB: for now this only supports <=4GB text sections in object files.
83 // Alternative schemes will need to be created for >32-bit offsets to
84 // avoid making this section overly large.
85 let func_start = u32::try_from(func.start).unwrap();
86 let func_end = u32::try_from(func.end).unwrap();
87
88 self.offsets.reserve(instrs.len());
89 self.positions.reserve(instrs.len());
90 for map in instrs {
91 // Sanity-check to ensure that functions are pushed in-order, otherwise
92 // the `offsets` array won't be sorted which is our goal.
93 let pos = func_start + map.code_offset;
94 assert!(pos >= self.last_offset);
95 self.offsets.push(U32Bytes::new(LittleEndian, pos));
96 self.positions
97 .push(U32Bytes::new(LittleEndian, map.srcloc.0));
98 self.last_offset = pos;
99 }
100 self.last_offset = func_end;
101 }
102
103 /// Finishes encoding this section into the `Object` provided.
104 pub fn append_to(self, obj: &mut Object) {
105 let section = obj.add_section(
106 obj.segment_name(StandardSegment::Data).to_vec(),
107 ELF_WASMTIME_ADDRMAP.as_bytes().to_vec(),
108 SectionKind::ReadOnlyData,
109 );
110
111 // NB: this matches the encoding expected by `lookup` below.
112 let amt = u32::try_from(self.offsets.len()).unwrap();
113 obj.append_section_data(section, &amt.to_le_bytes(), 1);
114 obj.append_section_data(section, object::bytes_of_slice(&self.offsets), 1);
115 obj.append_section_data(section, object::bytes_of_slice(&self.positions), 1);
116 }
117}
118
119/// Parse an `ELF_WASMTIME_ADDRMAP` section, returning the slice of code offsets
120/// and the slice of associated file positions for each offset.
121fn parse_address_map(
122 section: &[u8],
123) -> Option<(&[U32Bytes<LittleEndian>], &[U32Bytes<LittleEndian>])> {
124 let mut section = Bytes(section);
125 // NB: this matches the encoding written by `append_to` above.
126 let count = section.read::<U32Bytes<LittleEndian>>().ok()?;
127 let count = usize::try_from(count.get(LittleEndian)).ok()?;
128 let (offsets, section) =
129 object::slice_from_bytes::<U32Bytes<LittleEndian>>(section.0, count).ok()?;
130 let (positions, section) =
131 object::slice_from_bytes::<U32Bytes<LittleEndian>>(section, count).ok()?;
132 debug_assert!(section.is_empty());
133 Some((offsets, positions))
134}
135
136/// Lookup an `offset` within an encoded address map section, returning the
137/// original `FilePos` that corresponds to the offset, if found.
138///
139/// This function takes a `section` as its first argument which must have been
140/// created with `AddressMapSection` above. This is intended to be the raw
141/// `ELF_WASMTIME_ADDRMAP` section from the compilation artifact.
142///
143/// The `offset` provided is a relative offset from the start of the text
144/// section of the pc that is being looked up. If `offset` is out of range or
145/// doesn't correspond to anything in this file then `None` is returned.
146pub fn lookup_file_pos(section: &[u8], offset: usize) -> Option<FilePos> {
147 let (offsets, positions) = parse_address_map(section)?;
148
149 // First perform a binary search on the `offsets` array. This is a sorted
150 // array of offsets within the text section, which is conveniently what our
151 // `offset` also is. Note that we are somewhat unlikely to find a precise
152 // match on the element in the array, so we're largely interested in which
153 // "bucket" the `offset` falls into.
154 let offset = u32::try_from(offset).ok()?;
155 let index = match offsets.binary_search_by_key(&offset, |v| v.get(LittleEndian)) {
156 // Exact hit!
157 Ok(i) => i,
158
159 // This *would* be at the first slot in the array, so no
160 // instructions cover `pc`.
161 Err(0) => return None,
162
163 // This would be at the `nth` slot, so we're at the `n-1`th slot.
164 Err(n) => n - 1,
165 };
166
167 // Using the `index` we found of which bucket `offset` corresponds to we can
168 // lookup the actual `FilePos` value in the `positions` array.
169 let pos = positions.get(index)?;
170 Some(FilePos(pos.get(LittleEndian)))
171}
172
173/// Iterate over the address map contained in the given address map section.
174///
175/// This function takes a `section` as its first argument which must have been
176/// created with `AddressMapSection` above. This is intended to be the raw
177/// `ELF_WASMTIME_ADDRMAP` section from the compilation artifact.
178///
179/// The yielded offsets are relative to the start of the text section for this
180/// map's code object.
181pub fn iterate_address_map<'a>(
182 section: &'a [u8],
183) -> Option<impl Iterator<Item = (u32, FilePos)> + 'a> {
184 let (offsets, positions) = parse_address_map(section)?;
185
186 Some(
187 offsets
188 .iter()
189 .map(|o| o.get(LittleEndian))
190 .zip(positions.iter().map(|pos| FilePos(pos.get(LittleEndian)))),
191 )
192}