1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
//! Data structures to provide transformation of the source
use crate::obj::ELF_WASMTIME_ADDRMAP;
use object::write::{Object, StandardSegment};
use object::{Bytes, LittleEndian, SectionKind, U32Bytes};
use serde::{Deserialize, Serialize};
use std::convert::TryFrom;
use std::ops::Range;
/// Single source location to generated address mapping.
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
pub struct InstructionAddressMap {
/// Where in the source wasm binary this instruction comes from, specified
/// in an offset of bytes from the front of the file.
pub srcloc: FilePos,
/// Offset from the start of the function's compiled code to where this
/// instruction is located, or the region where it starts.
pub code_offset: u32,
}
/// A position within an original source file,
///
/// This structure is used as a newtype wrapper around a 32-bit integer which
/// represents an offset within a file where a wasm instruction or function is
/// to be originally found.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub struct FilePos(u32);
impl FilePos {
/// Create a new file position with the given offset.
pub fn new(pos: u32) -> FilePos {
assert!(pos != u32::MAX);
FilePos(pos)
}
/// Returns the offset that this offset was created with.
///
/// Note that the `Default` implementation will return `None` here, whereas
/// positions created with `FilePos::new` will return `Some`.
pub fn file_offset(self) -> Option<u32> {
if self.0 == u32::MAX {
None
} else {
Some(self.0)
}
}
}
impl Default for FilePos {
fn default() -> FilePos {
FilePos(u32::MAX)
}
}
/// Builder for the address map section of a wasmtime compilation image.
///
/// This builder is used to conveniently built the `ELF_WASMTIME_ADDRMAP`
/// section by compilers, and provides utilities to directly insert the results
/// into an `Object`.
#[derive(Default)]
pub struct AddressMapSection {
offsets: Vec<U32Bytes<LittleEndian>>,
positions: Vec<U32Bytes<LittleEndian>>,
last_offset: u32,
}
impl AddressMapSection {
/// Pushes a new set of instruction mapping information for a function added
/// in the exectuable.
///
/// The `func` argument here is the range of the function, relative to the
/// start of the text section in the executable. The `instrs` provided are
/// the descriptors for instructions in the function and their various
/// mappings back to original source positions.
///
/// This is required to be called for `func` values that are strictly
/// increasing in addresses (e.g. as the object is built). Additionally the
/// `instrs` map must be sorted based on code offset in the native text
/// section.
pub fn push(&mut self, func: Range<u64>, instrs: &[InstructionAddressMap]) {
// NB: for now this only supports <=4GB text sections in object files.
// Alternative schemes will need to be created for >32-bit offsets to
// avoid making this section overly large.
let func_start = u32::try_from(func.start).unwrap();
let func_end = u32::try_from(func.end).unwrap();
self.offsets.reserve(instrs.len());
self.positions.reserve(instrs.len());
for map in instrs {
// Sanity-check to ensure that functions are pushed in-order, otherwise
// the `offsets` array won't be sorted which is our goal.
let pos = func_start + map.code_offset;
assert!(pos >= self.last_offset);
self.offsets.push(U32Bytes::new(LittleEndian, pos));
self.positions
.push(U32Bytes::new(LittleEndian, map.srcloc.0));
self.last_offset = pos;
}
self.last_offset = func_end;
}
/// Finishes encoding this section into the `Object` provided.
pub fn append_to(self, obj: &mut Object) {
let section = obj.add_section(
obj.segment_name(StandardSegment::Data).to_vec(),
ELF_WASMTIME_ADDRMAP.as_bytes().to_vec(),
SectionKind::ReadOnlyData,
);
// NB: this matches the encoding expected by `lookup` below.
let amt = u32::try_from(self.offsets.len()).unwrap();
obj.append_section_data(section, &amt.to_le_bytes(), 1);
obj.append_section_data(section, object::bytes_of_slice(&self.offsets), 1);
obj.append_section_data(section, object::bytes_of_slice(&self.positions), 1);
}
}
/// Parse an `ELF_WASMTIME_ADDRMAP` section, returning the slice of code offsets
/// and the slice of associated file positions for each offset.
fn parse_address_map(
section: &[u8],
) -> Option<(&[U32Bytes<LittleEndian>], &[U32Bytes<LittleEndian>])> {
let mut section = Bytes(section);
// NB: this matches the encoding written by `append_to` above.
let count = section.read::<U32Bytes<LittleEndian>>().ok()?;
let count = usize::try_from(count.get(LittleEndian)).ok()?;
let (offsets, section) =
object::slice_from_bytes::<U32Bytes<LittleEndian>>(section.0, count).ok()?;
let (positions, section) =
object::slice_from_bytes::<U32Bytes<LittleEndian>>(section, count).ok()?;
debug_assert!(section.is_empty());
Some((offsets, positions))
}
/// Lookup an `offset` within an encoded address map section, returning the
/// original `FilePos` that corresponds to the offset, if found.
///
/// This function takes a `section` as its first argument which must have been
/// created with `AddressMapSection` above. This is intended to be the raw
/// `ELF_WASMTIME_ADDRMAP` section from the compilation artifact.
///
/// The `offset` provided is a relative offset from the start of the text
/// section of the pc that is being looked up. If `offset` is out of range or
/// doesn't correspond to anything in this file then `None` is returned.
pub fn lookup_file_pos(section: &[u8], offset: usize) -> Option<FilePos> {
let (offsets, positions) = parse_address_map(section)?;
// First perform a binary search on the `offsets` array. This is a sorted
// array of offsets within the text section, which is conveniently what our
// `offset` also is. Note that we are somewhat unlikely to find a precise
// match on the element in the array, so we're largely interested in which
// "bucket" the `offset` falls into.
let offset = u32::try_from(offset).ok()?;
let index = match offsets.binary_search_by_key(&offset, |v| v.get(LittleEndian)) {
// Exact hit!
Ok(i) => i,
// This *would* be at the first slot in the array, so no
// instructions cover `pc`.
Err(0) => return None,
// This would be at the `nth` slot, so we're at the `n-1`th slot.
Err(n) => n - 1,
};
// Using the `index` we found of which bucket `offset` corresponds to we can
// lookup the actual `FilePos` value in the `positions` array.
let pos = positions.get(index)?;
Some(FilePos(pos.get(LittleEndian)))
}
/// Iterate over the address map contained in the given address map section.
///
/// This function takes a `section` as its first argument which must have been
/// created with `AddressMapSection` above. This is intended to be the raw
/// `ELF_WASMTIME_ADDRMAP` section from the compilation artifact.
///
/// The yielded offsets are relative to the start of the text section for this
/// map's code object.
pub fn iterate_address_map<'a>(
section: &'a [u8],
) -> Option<impl Iterator<Item = (u32, FilePos)> + 'a> {
let (offsets, positions) = parse_address_map(section)?;
Some(
offsets
.iter()
.map(|o| o.get(LittleEndian))
.zip(positions.iter().map(|pos| FilePos(pos.get(LittleEndian)))),
)
}